diff options
Diffstat (limited to 'fs')
148 files changed, 6376 insertions, 3047 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 392c5dac1981..d934f04e7736 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c | |||
@@ -184,10 +184,20 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
184 | v9ses->afid = option; | 184 | v9ses->afid = option; |
185 | break; | 185 | break; |
186 | case Opt_uname: | 186 | case Opt_uname: |
187 | match_strlcpy(v9ses->uname, &args[0], PATH_MAX); | 187 | kfree(v9ses->uname); |
188 | v9ses->uname = match_strdup(&args[0]); | ||
189 | if (!v9ses->uname) { | ||
190 | ret = -ENOMEM; | ||
191 | goto free_and_return; | ||
192 | } | ||
188 | break; | 193 | break; |
189 | case Opt_remotename: | 194 | case Opt_remotename: |
190 | match_strlcpy(v9ses->aname, &args[0], PATH_MAX); | 195 | kfree(v9ses->aname); |
196 | v9ses->aname = match_strdup(&args[0]); | ||
197 | if (!v9ses->aname) { | ||
198 | ret = -ENOMEM; | ||
199 | goto free_and_return; | ||
200 | } | ||
191 | break; | 201 | break; |
192 | case Opt_nodevmap: | 202 | case Opt_nodevmap: |
193 | v9ses->nodev = 1; | 203 | v9ses->nodev = 1; |
@@ -287,21 +297,21 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, | |||
287 | struct p9_fid *fid; | 297 | struct p9_fid *fid; |
288 | int rc; | 298 | int rc; |
289 | 299 | ||
290 | v9ses->uname = __getname(); | 300 | v9ses->uname = kstrdup(V9FS_DEFUSER, GFP_KERNEL); |
291 | if (!v9ses->uname) | 301 | if (!v9ses->uname) |
292 | return ERR_PTR(-ENOMEM); | 302 | return ERR_PTR(-ENOMEM); |
293 | 303 | ||
294 | v9ses->aname = __getname(); | 304 | v9ses->aname = kstrdup(V9FS_DEFANAME, GFP_KERNEL); |
295 | if (!v9ses->aname) { | 305 | if (!v9ses->aname) { |
296 | __putname(v9ses->uname); | 306 | kfree(v9ses->uname); |
297 | return ERR_PTR(-ENOMEM); | 307 | return ERR_PTR(-ENOMEM); |
298 | } | 308 | } |
299 | init_rwsem(&v9ses->rename_sem); | 309 | init_rwsem(&v9ses->rename_sem); |
300 | 310 | ||
301 | rc = bdi_setup_and_register(&v9ses->bdi, "9p", BDI_CAP_MAP_COPY); | 311 | rc = bdi_setup_and_register(&v9ses->bdi, "9p", BDI_CAP_MAP_COPY); |
302 | if (rc) { | 312 | if (rc) { |
303 | __putname(v9ses->aname); | 313 | kfree(v9ses->aname); |
304 | __putname(v9ses->uname); | 314 | kfree(v9ses->uname); |
305 | return ERR_PTR(rc); | 315 | return ERR_PTR(rc); |
306 | } | 316 | } |
307 | 317 | ||
@@ -309,8 +319,6 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, | |||
309 | list_add(&v9ses->slist, &v9fs_sessionlist); | 319 | list_add(&v9ses->slist, &v9fs_sessionlist); |
310 | spin_unlock(&v9fs_sessionlist_lock); | 320 | spin_unlock(&v9fs_sessionlist_lock); |
311 | 321 | ||
312 | strcpy(v9ses->uname, V9FS_DEFUSER); | ||
313 | strcpy(v9ses->aname, V9FS_DEFANAME); | ||
314 | v9ses->uid = ~0; | 322 | v9ses->uid = ~0; |
315 | v9ses->dfltuid = V9FS_DEFUID; | 323 | v9ses->dfltuid = V9FS_DEFUID; |
316 | v9ses->dfltgid = V9FS_DEFGID; | 324 | v9ses->dfltgid = V9FS_DEFGID; |
@@ -412,8 +420,8 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) | |||
412 | kfree(v9ses->cachetag); | 420 | kfree(v9ses->cachetag); |
413 | } | 421 | } |
414 | #endif | 422 | #endif |
415 | __putname(v9ses->uname); | 423 | kfree(v9ses->uname); |
416 | __putname(v9ses->aname); | 424 | kfree(v9ses->aname); |
417 | 425 | ||
418 | bdi_destroy(&v9ses->bdi); | 426 | bdi_destroy(&v9ses->bdi); |
419 | 427 | ||
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index cbf9dbb1b2a2..890bed538f9b 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -1276,12 +1276,12 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) | |||
1276 | } | 1276 | } |
1277 | 1277 | ||
1278 | /* copy extension buffer into buffer */ | 1278 | /* copy extension buffer into buffer */ |
1279 | strncpy(buffer, st->extension, buflen); | 1279 | retval = min(strlen(st->extension)+1, (size_t)buflen); |
1280 | memcpy(buffer, st->extension, retval); | ||
1280 | 1281 | ||
1281 | p9_debug(P9_DEBUG_VFS, "%s -> %s (%s)\n", | 1282 | p9_debug(P9_DEBUG_VFS, "%s -> %s (%.*s)\n", |
1282 | dentry->d_name.name, st->extension, buffer); | 1283 | dentry->d_name.name, st->extension, buflen, buffer); |
1283 | 1284 | ||
1284 | retval = strnlen(buffer, buflen); | ||
1285 | done: | 1285 | done: |
1286 | p9stat_free(st); | 1286 | p9stat_free(st); |
1287 | kfree(st); | 1287 | kfree(st); |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index e7396cfdb109..91b11650722e 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -392,10 +392,12 @@ static struct vfsmount *autofs4_d_automount(struct path *path) | |||
392 | ino->flags |= AUTOFS_INF_PENDING; | 392 | ino->flags |= AUTOFS_INF_PENDING; |
393 | spin_unlock(&sbi->fs_lock); | 393 | spin_unlock(&sbi->fs_lock); |
394 | status = autofs4_mount_wait(dentry); | 394 | status = autofs4_mount_wait(dentry); |
395 | if (status) | ||
396 | return ERR_PTR(status); | ||
397 | spin_lock(&sbi->fs_lock); | 395 | spin_lock(&sbi->fs_lock); |
398 | ino->flags &= ~AUTOFS_INF_PENDING; | 396 | ino->flags &= ~AUTOFS_INF_PENDING; |
397 | if (status) { | ||
398 | spin_unlock(&sbi->fs_lock); | ||
399 | return ERR_PTR(status); | ||
400 | } | ||
399 | } | 401 | } |
400 | done: | 402 | done: |
401 | if (!(ino->flags & AUTOFS_INF_EXPIRING)) { | 403 | if (!(ino->flags & AUTOFS_INF_EXPIRING)) { |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index e800dec958c3..fbd9f60bd763 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -36,7 +36,6 @@ | |||
36 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
37 | #include <asm/param.h> | 37 | #include <asm/param.h> |
38 | #include <asm/page.h> | 38 | #include <asm/page.h> |
39 | #include <asm/exec.h> | ||
40 | 39 | ||
41 | #ifndef user_long_t | 40 | #ifndef user_long_t |
42 | #define user_long_t long | 41 | #define user_long_t long |
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 262db114ff01..a46049154107 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c | |||
@@ -39,7 +39,6 @@ | |||
39 | #include <asm/uaccess.h> | 39 | #include <asm/uaccess.h> |
40 | #include <asm/param.h> | 40 | #include <asm/param.h> |
41 | #include <asm/pgalloc.h> | 41 | #include <asm/pgalloc.h> |
42 | #include <asm/exec.h> | ||
43 | 42 | ||
44 | typedef char *elf_caddr_t; | 43 | typedef char *elf_caddr_t; |
45 | 44 | ||
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index e85c04b9f61c..a3f28f331b2b 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c | |||
@@ -70,23 +70,25 @@ static inline int use_bip_pool(unsigned int idx) | |||
70 | } | 70 | } |
71 | 71 | ||
72 | /** | 72 | /** |
73 | * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio | 73 | * bio_integrity_alloc - Allocate integrity payload and attach it to bio |
74 | * @bio: bio to attach integrity metadata to | 74 | * @bio: bio to attach integrity metadata to |
75 | * @gfp_mask: Memory allocation mask | 75 | * @gfp_mask: Memory allocation mask |
76 | * @nr_vecs: Number of integrity metadata scatter-gather elements | 76 | * @nr_vecs: Number of integrity metadata scatter-gather elements |
77 | * @bs: bio_set to allocate from | ||
78 | * | 77 | * |
79 | * Description: This function prepares a bio for attaching integrity | 78 | * Description: This function prepares a bio for attaching integrity |
80 | * metadata. nr_vecs specifies the maximum number of pages containing | 79 | * metadata. nr_vecs specifies the maximum number of pages containing |
81 | * integrity metadata that can be attached. | 80 | * integrity metadata that can be attached. |
82 | */ | 81 | */ |
83 | struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, | 82 | struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, |
84 | gfp_t gfp_mask, | 83 | gfp_t gfp_mask, |
85 | unsigned int nr_vecs, | 84 | unsigned int nr_vecs) |
86 | struct bio_set *bs) | ||
87 | { | 85 | { |
88 | struct bio_integrity_payload *bip; | 86 | struct bio_integrity_payload *bip; |
89 | unsigned int idx = vecs_to_idx(nr_vecs); | 87 | unsigned int idx = vecs_to_idx(nr_vecs); |
88 | struct bio_set *bs = bio->bi_pool; | ||
89 | |||
90 | if (!bs) | ||
91 | bs = fs_bio_set; | ||
90 | 92 | ||
91 | BUG_ON(bio == NULL); | 93 | BUG_ON(bio == NULL); |
92 | bip = NULL; | 94 | bip = NULL; |
@@ -114,37 +116,22 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, | |||
114 | 116 | ||
115 | return bip; | 117 | return bip; |
116 | } | 118 | } |
117 | EXPORT_SYMBOL(bio_integrity_alloc_bioset); | ||
118 | |||
119 | /** | ||
120 | * bio_integrity_alloc - Allocate integrity payload and attach it to bio | ||
121 | * @bio: bio to attach integrity metadata to | ||
122 | * @gfp_mask: Memory allocation mask | ||
123 | * @nr_vecs: Number of integrity metadata scatter-gather elements | ||
124 | * | ||
125 | * Description: This function prepares a bio for attaching integrity | ||
126 | * metadata. nr_vecs specifies the maximum number of pages containing | ||
127 | * integrity metadata that can be attached. | ||
128 | */ | ||
129 | struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, | ||
130 | gfp_t gfp_mask, | ||
131 | unsigned int nr_vecs) | ||
132 | { | ||
133 | return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set); | ||
134 | } | ||
135 | EXPORT_SYMBOL(bio_integrity_alloc); | 119 | EXPORT_SYMBOL(bio_integrity_alloc); |
136 | 120 | ||
137 | /** | 121 | /** |
138 | * bio_integrity_free - Free bio integrity payload | 122 | * bio_integrity_free - Free bio integrity payload |
139 | * @bio: bio containing bip to be freed | 123 | * @bio: bio containing bip to be freed |
140 | * @bs: bio_set this bio was allocated from | ||
141 | * | 124 | * |
142 | * Description: Used to free the integrity portion of a bio. Usually | 125 | * Description: Used to free the integrity portion of a bio. Usually |
143 | * called from bio_free(). | 126 | * called from bio_free(). |
144 | */ | 127 | */ |
145 | void bio_integrity_free(struct bio *bio, struct bio_set *bs) | 128 | void bio_integrity_free(struct bio *bio) |
146 | { | 129 | { |
147 | struct bio_integrity_payload *bip = bio->bi_integrity; | 130 | struct bio_integrity_payload *bip = bio->bi_integrity; |
131 | struct bio_set *bs = bio->bi_pool; | ||
132 | |||
133 | if (!bs) | ||
134 | bs = fs_bio_set; | ||
148 | 135 | ||
149 | BUG_ON(bip == NULL); | 136 | BUG_ON(bip == NULL); |
150 | 137 | ||
@@ -730,19 +717,18 @@ EXPORT_SYMBOL(bio_integrity_split); | |||
730 | * @bio: New bio | 717 | * @bio: New bio |
731 | * @bio_src: Original bio | 718 | * @bio_src: Original bio |
732 | * @gfp_mask: Memory allocation mask | 719 | * @gfp_mask: Memory allocation mask |
733 | * @bs: bio_set to allocate bip from | ||
734 | * | 720 | * |
735 | * Description: Called to allocate a bip when cloning a bio | 721 | * Description: Called to allocate a bip when cloning a bio |
736 | */ | 722 | */ |
737 | int bio_integrity_clone(struct bio *bio, struct bio *bio_src, | 723 | int bio_integrity_clone(struct bio *bio, struct bio *bio_src, |
738 | gfp_t gfp_mask, struct bio_set *bs) | 724 | gfp_t gfp_mask) |
739 | { | 725 | { |
740 | struct bio_integrity_payload *bip_src = bio_src->bi_integrity; | 726 | struct bio_integrity_payload *bip_src = bio_src->bi_integrity; |
741 | struct bio_integrity_payload *bip; | 727 | struct bio_integrity_payload *bip; |
742 | 728 | ||
743 | BUG_ON(bip_src == NULL); | 729 | BUG_ON(bip_src == NULL); |
744 | 730 | ||
745 | bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs); | 731 | bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt); |
746 | 732 | ||
747 | if (bip == NULL) | 733 | if (bip == NULL) |
748 | return -EIO; | 734 | return -EIO; |
@@ -55,6 +55,7 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { | |||
55 | * IO code that does not need private memory pools. | 55 | * IO code that does not need private memory pools. |
56 | */ | 56 | */ |
57 | struct bio_set *fs_bio_set; | 57 | struct bio_set *fs_bio_set; |
58 | EXPORT_SYMBOL(fs_bio_set); | ||
58 | 59 | ||
59 | /* | 60 | /* |
60 | * Our slab pool management | 61 | * Our slab pool management |
@@ -233,26 +234,37 @@ fallback: | |||
233 | return bvl; | 234 | return bvl; |
234 | } | 235 | } |
235 | 236 | ||
236 | void bio_free(struct bio *bio, struct bio_set *bs) | 237 | static void __bio_free(struct bio *bio) |
237 | { | 238 | { |
239 | bio_disassociate_task(bio); | ||
240 | |||
241 | if (bio_integrity(bio)) | ||
242 | bio_integrity_free(bio); | ||
243 | } | ||
244 | |||
245 | static void bio_free(struct bio *bio) | ||
246 | { | ||
247 | struct bio_set *bs = bio->bi_pool; | ||
238 | void *p; | 248 | void *p; |
239 | 249 | ||
240 | if (bio_has_allocated_vec(bio)) | 250 | __bio_free(bio); |
241 | bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); | ||
242 | 251 | ||
243 | if (bio_integrity(bio)) | 252 | if (bs) { |
244 | bio_integrity_free(bio, bs); | 253 | if (bio_has_allocated_vec(bio)) |
254 | bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); | ||
245 | 255 | ||
246 | /* | 256 | /* |
247 | * If we have front padding, adjust the bio pointer before freeing | 257 | * If we have front padding, adjust the bio pointer before freeing |
248 | */ | 258 | */ |
249 | p = bio; | 259 | p = bio; |
250 | if (bs->front_pad) | ||
251 | p -= bs->front_pad; | 260 | p -= bs->front_pad; |
252 | 261 | ||
253 | mempool_free(p, bs->bio_pool); | 262 | mempool_free(p, bs->bio_pool); |
263 | } else { | ||
264 | /* Bio was allocated by bio_kmalloc() */ | ||
265 | kfree(bio); | ||
266 | } | ||
254 | } | 267 | } |
255 | EXPORT_SYMBOL(bio_free); | ||
256 | 268 | ||
257 | void bio_init(struct bio *bio) | 269 | void bio_init(struct bio *bio) |
258 | { | 270 | { |
@@ -263,48 +275,85 @@ void bio_init(struct bio *bio) | |||
263 | EXPORT_SYMBOL(bio_init); | 275 | EXPORT_SYMBOL(bio_init); |
264 | 276 | ||
265 | /** | 277 | /** |
278 | * bio_reset - reinitialize a bio | ||
279 | * @bio: bio to reset | ||
280 | * | ||
281 | * Description: | ||
282 | * After calling bio_reset(), @bio will be in the same state as a freshly | ||
283 | * allocated bio returned bio bio_alloc_bioset() - the only fields that are | ||
284 | * preserved are the ones that are initialized by bio_alloc_bioset(). See | ||
285 | * comment in struct bio. | ||
286 | */ | ||
287 | void bio_reset(struct bio *bio) | ||
288 | { | ||
289 | unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS); | ||
290 | |||
291 | __bio_free(bio); | ||
292 | |||
293 | memset(bio, 0, BIO_RESET_BYTES); | ||
294 | bio->bi_flags = flags|(1 << BIO_UPTODATE); | ||
295 | } | ||
296 | EXPORT_SYMBOL(bio_reset); | ||
297 | |||
298 | /** | ||
266 | * bio_alloc_bioset - allocate a bio for I/O | 299 | * bio_alloc_bioset - allocate a bio for I/O |
267 | * @gfp_mask: the GFP_ mask given to the slab allocator | 300 | * @gfp_mask: the GFP_ mask given to the slab allocator |
268 | * @nr_iovecs: number of iovecs to pre-allocate | 301 | * @nr_iovecs: number of iovecs to pre-allocate |
269 | * @bs: the bio_set to allocate from. | 302 | * @bs: the bio_set to allocate from. |
270 | * | 303 | * |
271 | * Description: | 304 | * Description: |
272 | * bio_alloc_bioset will try its own mempool to satisfy the allocation. | 305 | * If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is |
273 | * If %__GFP_WAIT is set then we will block on the internal pool waiting | 306 | * backed by the @bs's mempool. |
274 | * for a &struct bio to become free. | ||
275 | * | 307 | * |
276 | * Note that the caller must set ->bi_destructor on successful return | 308 | * When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be |
277 | * of a bio, to do the appropriate freeing of the bio once the reference | 309 | * able to allocate a bio. This is due to the mempool guarantees. To make this |
278 | * count drops to zero. | 310 | * work, callers must never allocate more than 1 bio at a time from this pool. |
279 | **/ | 311 | * Callers that need to allocate more than 1 bio must always submit the |
312 | * previously allocated bio for IO before attempting to allocate a new one. | ||
313 | * Failure to do so can cause deadlocks under memory pressure. | ||
314 | * | ||
315 | * RETURNS: | ||
316 | * Pointer to new bio on success, NULL on failure. | ||
317 | */ | ||
280 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | 318 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) |
281 | { | 319 | { |
320 | unsigned front_pad; | ||
321 | unsigned inline_vecs; | ||
282 | unsigned long idx = BIO_POOL_NONE; | 322 | unsigned long idx = BIO_POOL_NONE; |
283 | struct bio_vec *bvl = NULL; | 323 | struct bio_vec *bvl = NULL; |
284 | struct bio *bio; | 324 | struct bio *bio; |
285 | void *p; | 325 | void *p; |
286 | 326 | ||
287 | p = mempool_alloc(bs->bio_pool, gfp_mask); | 327 | if (!bs) { |
328 | if (nr_iovecs > UIO_MAXIOV) | ||
329 | return NULL; | ||
330 | |||
331 | p = kmalloc(sizeof(struct bio) + | ||
332 | nr_iovecs * sizeof(struct bio_vec), | ||
333 | gfp_mask); | ||
334 | front_pad = 0; | ||
335 | inline_vecs = nr_iovecs; | ||
336 | } else { | ||
337 | p = mempool_alloc(bs->bio_pool, gfp_mask); | ||
338 | front_pad = bs->front_pad; | ||
339 | inline_vecs = BIO_INLINE_VECS; | ||
340 | } | ||
341 | |||
288 | if (unlikely(!p)) | 342 | if (unlikely(!p)) |
289 | return NULL; | 343 | return NULL; |
290 | bio = p + bs->front_pad; | ||
291 | 344 | ||
345 | bio = p + front_pad; | ||
292 | bio_init(bio); | 346 | bio_init(bio); |
293 | 347 | ||
294 | if (unlikely(!nr_iovecs)) | 348 | if (nr_iovecs > inline_vecs) { |
295 | goto out_set; | ||
296 | |||
297 | if (nr_iovecs <= BIO_INLINE_VECS) { | ||
298 | bvl = bio->bi_inline_vecs; | ||
299 | nr_iovecs = BIO_INLINE_VECS; | ||
300 | } else { | ||
301 | bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); | 349 | bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); |
302 | if (unlikely(!bvl)) | 350 | if (unlikely(!bvl)) |
303 | goto err_free; | 351 | goto err_free; |
304 | 352 | } else if (nr_iovecs) { | |
305 | nr_iovecs = bvec_nr_vecs(idx); | 353 | bvl = bio->bi_inline_vecs; |
306 | } | 354 | } |
307 | out_set: | 355 | |
356 | bio->bi_pool = bs; | ||
308 | bio->bi_flags |= idx << BIO_POOL_OFFSET; | 357 | bio->bi_flags |= idx << BIO_POOL_OFFSET; |
309 | bio->bi_max_vecs = nr_iovecs; | 358 | bio->bi_max_vecs = nr_iovecs; |
310 | bio->bi_io_vec = bvl; | 359 | bio->bi_io_vec = bvl; |
@@ -316,80 +365,6 @@ err_free: | |||
316 | } | 365 | } |
317 | EXPORT_SYMBOL(bio_alloc_bioset); | 366 | EXPORT_SYMBOL(bio_alloc_bioset); |
318 | 367 | ||
319 | static void bio_fs_destructor(struct bio *bio) | ||
320 | { | ||
321 | bio_free(bio, fs_bio_set); | ||
322 | } | ||
323 | |||
324 | /** | ||
325 | * bio_alloc - allocate a new bio, memory pool backed | ||
326 | * @gfp_mask: allocation mask to use | ||
327 | * @nr_iovecs: number of iovecs | ||
328 | * | ||
329 | * bio_alloc will allocate a bio and associated bio_vec array that can hold | ||
330 | * at least @nr_iovecs entries. Allocations will be done from the | ||
331 | * fs_bio_set. Also see @bio_alloc_bioset and @bio_kmalloc. | ||
332 | * | ||
333 | * If %__GFP_WAIT is set, then bio_alloc will always be able to allocate | ||
334 | * a bio. This is due to the mempool guarantees. To make this work, callers | ||
335 | * must never allocate more than 1 bio at a time from this pool. Callers | ||
336 | * that need to allocate more than 1 bio must always submit the previously | ||
337 | * allocated bio for IO before attempting to allocate a new one. Failure to | ||
338 | * do so can cause livelocks under memory pressure. | ||
339 | * | ||
340 | * RETURNS: | ||
341 | * Pointer to new bio on success, NULL on failure. | ||
342 | */ | ||
343 | struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) | ||
344 | { | ||
345 | struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); | ||
346 | |||
347 | if (bio) | ||
348 | bio->bi_destructor = bio_fs_destructor; | ||
349 | |||
350 | return bio; | ||
351 | } | ||
352 | EXPORT_SYMBOL(bio_alloc); | ||
353 | |||
354 | static void bio_kmalloc_destructor(struct bio *bio) | ||
355 | { | ||
356 | if (bio_integrity(bio)) | ||
357 | bio_integrity_free(bio, fs_bio_set); | ||
358 | kfree(bio); | ||
359 | } | ||
360 | |||
361 | /** | ||
362 | * bio_kmalloc - allocate a bio for I/O using kmalloc() | ||
363 | * @gfp_mask: the GFP_ mask given to the slab allocator | ||
364 | * @nr_iovecs: number of iovecs to pre-allocate | ||
365 | * | ||
366 | * Description: | ||
367 | * Allocate a new bio with @nr_iovecs bvecs. If @gfp_mask contains | ||
368 | * %__GFP_WAIT, the allocation is guaranteed to succeed. | ||
369 | * | ||
370 | **/ | ||
371 | struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) | ||
372 | { | ||
373 | struct bio *bio; | ||
374 | |||
375 | if (nr_iovecs > UIO_MAXIOV) | ||
376 | return NULL; | ||
377 | |||
378 | bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec), | ||
379 | gfp_mask); | ||
380 | if (unlikely(!bio)) | ||
381 | return NULL; | ||
382 | |||
383 | bio_init(bio); | ||
384 | bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET; | ||
385 | bio->bi_max_vecs = nr_iovecs; | ||
386 | bio->bi_io_vec = bio->bi_inline_vecs; | ||
387 | bio->bi_destructor = bio_kmalloc_destructor; | ||
388 | |||
389 | return bio; | ||
390 | } | ||
391 | EXPORT_SYMBOL(bio_kmalloc); | ||
392 | |||
393 | void zero_fill_bio(struct bio *bio) | 368 | void zero_fill_bio(struct bio *bio) |
394 | { | 369 | { |
395 | unsigned long flags; | 370 | unsigned long flags; |
@@ -420,11 +395,8 @@ void bio_put(struct bio *bio) | |||
420 | /* | 395 | /* |
421 | * last put frees it | 396 | * last put frees it |
422 | */ | 397 | */ |
423 | if (atomic_dec_and_test(&bio->bi_cnt)) { | 398 | if (atomic_dec_and_test(&bio->bi_cnt)) |
424 | bio_disassociate_task(bio); | 399 | bio_free(bio); |
425 | bio->bi_next = NULL; | ||
426 | bio->bi_destructor(bio); | ||
427 | } | ||
428 | } | 400 | } |
429 | EXPORT_SYMBOL(bio_put); | 401 | EXPORT_SYMBOL(bio_put); |
430 | 402 | ||
@@ -466,26 +438,28 @@ void __bio_clone(struct bio *bio, struct bio *bio_src) | |||
466 | EXPORT_SYMBOL(__bio_clone); | 438 | EXPORT_SYMBOL(__bio_clone); |
467 | 439 | ||
468 | /** | 440 | /** |
469 | * bio_clone - clone a bio | 441 | * bio_clone_bioset - clone a bio |
470 | * @bio: bio to clone | 442 | * @bio: bio to clone |
471 | * @gfp_mask: allocation priority | 443 | * @gfp_mask: allocation priority |
444 | * @bs: bio_set to allocate from | ||
472 | * | 445 | * |
473 | * Like __bio_clone, only also allocates the returned bio | 446 | * Like __bio_clone, only also allocates the returned bio |
474 | */ | 447 | */ |
475 | struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) | 448 | struct bio *bio_clone_bioset(struct bio *bio, gfp_t gfp_mask, |
449 | struct bio_set *bs) | ||
476 | { | 450 | { |
477 | struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); | 451 | struct bio *b; |
478 | 452 | ||
453 | b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, bs); | ||
479 | if (!b) | 454 | if (!b) |
480 | return NULL; | 455 | return NULL; |
481 | 456 | ||
482 | b->bi_destructor = bio_fs_destructor; | ||
483 | __bio_clone(b, bio); | 457 | __bio_clone(b, bio); |
484 | 458 | ||
485 | if (bio_integrity(bio)) { | 459 | if (bio_integrity(bio)) { |
486 | int ret; | 460 | int ret; |
487 | 461 | ||
488 | ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set); | 462 | ret = bio_integrity_clone(b, bio, gfp_mask); |
489 | 463 | ||
490 | if (ret < 0) { | 464 | if (ret < 0) { |
491 | bio_put(b); | 465 | bio_put(b); |
@@ -495,7 +469,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) | |||
495 | 469 | ||
496 | return b; | 470 | return b; |
497 | } | 471 | } |
498 | EXPORT_SYMBOL(bio_clone); | 472 | EXPORT_SYMBOL(bio_clone_bioset); |
499 | 473 | ||
500 | /** | 474 | /** |
501 | * bio_get_nr_vecs - return approx number of vecs | 475 | * bio_get_nr_vecs - return approx number of vecs |
@@ -1501,7 +1475,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) | |||
1501 | trace_block_split(bdev_get_queue(bi->bi_bdev), bi, | 1475 | trace_block_split(bdev_get_queue(bi->bi_bdev), bi, |
1502 | bi->bi_sector + first_sectors); | 1476 | bi->bi_sector + first_sectors); |
1503 | 1477 | ||
1504 | BUG_ON(bi->bi_vcnt != 1); | 1478 | BUG_ON(bi->bi_vcnt != 1 && bi->bi_vcnt != 0); |
1505 | BUG_ON(bi->bi_idx != 0); | 1479 | BUG_ON(bi->bi_idx != 0); |
1506 | atomic_set(&bp->cnt, 3); | 1480 | atomic_set(&bp->cnt, 3); |
1507 | bp->error = 0; | 1481 | bp->error = 0; |
@@ -1511,17 +1485,22 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) | |||
1511 | bp->bio2.bi_size -= first_sectors << 9; | 1485 | bp->bio2.bi_size -= first_sectors << 9; |
1512 | bp->bio1.bi_size = first_sectors << 9; | 1486 | bp->bio1.bi_size = first_sectors << 9; |
1513 | 1487 | ||
1514 | bp->bv1 = bi->bi_io_vec[0]; | 1488 | if (bi->bi_vcnt != 0) { |
1515 | bp->bv2 = bi->bi_io_vec[0]; | 1489 | bp->bv1 = bi->bi_io_vec[0]; |
1516 | bp->bv2.bv_offset += first_sectors << 9; | 1490 | bp->bv2 = bi->bi_io_vec[0]; |
1517 | bp->bv2.bv_len -= first_sectors << 9; | 1491 | |
1518 | bp->bv1.bv_len = first_sectors << 9; | 1492 | if (bio_is_rw(bi)) { |
1493 | bp->bv2.bv_offset += first_sectors << 9; | ||
1494 | bp->bv2.bv_len -= first_sectors << 9; | ||
1495 | bp->bv1.bv_len = first_sectors << 9; | ||
1496 | } | ||
1519 | 1497 | ||
1520 | bp->bio1.bi_io_vec = &bp->bv1; | 1498 | bp->bio1.bi_io_vec = &bp->bv1; |
1521 | bp->bio2.bi_io_vec = &bp->bv2; | 1499 | bp->bio2.bi_io_vec = &bp->bv2; |
1522 | 1500 | ||
1523 | bp->bio1.bi_max_vecs = 1; | 1501 | bp->bio1.bi_max_vecs = 1; |
1524 | bp->bio2.bi_max_vecs = 1; | 1502 | bp->bio2.bi_max_vecs = 1; |
1503 | } | ||
1525 | 1504 | ||
1526 | bp->bio1.bi_end_io = bio_pair_end_1; | 1505 | bp->bio1.bi_end_io = bio_pair_end_1; |
1527 | bp->bio2.bi_end_io = bio_pair_end_2; | 1506 | bp->bio2.bi_end_io = bio_pair_end_2; |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 38e721b35d45..b3c1d3dae77d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -116,6 +116,8 @@ EXPORT_SYMBOL(invalidate_bdev); | |||
116 | 116 | ||
117 | int set_blocksize(struct block_device *bdev, int size) | 117 | int set_blocksize(struct block_device *bdev, int size) |
118 | { | 118 | { |
119 | struct address_space *mapping; | ||
120 | |||
119 | /* Size must be a power of two, and between 512 and PAGE_SIZE */ | 121 | /* Size must be a power of two, and between 512 and PAGE_SIZE */ |
120 | if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) | 122 | if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) |
121 | return -EINVAL; | 123 | return -EINVAL; |
@@ -124,6 +126,19 @@ int set_blocksize(struct block_device *bdev, int size) | |||
124 | if (size < bdev_logical_block_size(bdev)) | 126 | if (size < bdev_logical_block_size(bdev)) |
125 | return -EINVAL; | 127 | return -EINVAL; |
126 | 128 | ||
129 | /* Prevent starting I/O or mapping the device */ | ||
130 | percpu_down_write(&bdev->bd_block_size_semaphore); | ||
131 | |||
132 | /* Check that the block device is not memory mapped */ | ||
133 | mapping = bdev->bd_inode->i_mapping; | ||
134 | mutex_lock(&mapping->i_mmap_mutex); | ||
135 | if (mapping_mapped(mapping)) { | ||
136 | mutex_unlock(&mapping->i_mmap_mutex); | ||
137 | percpu_up_write(&bdev->bd_block_size_semaphore); | ||
138 | return -EBUSY; | ||
139 | } | ||
140 | mutex_unlock(&mapping->i_mmap_mutex); | ||
141 | |||
127 | /* Don't change the size if it is same as current */ | 142 | /* Don't change the size if it is same as current */ |
128 | if (bdev->bd_block_size != size) { | 143 | if (bdev->bd_block_size != size) { |
129 | sync_blockdev(bdev); | 144 | sync_blockdev(bdev); |
@@ -131,6 +146,9 @@ int set_blocksize(struct block_device *bdev, int size) | |||
131 | bdev->bd_inode->i_blkbits = blksize_bits(size); | 146 | bdev->bd_inode->i_blkbits = blksize_bits(size); |
132 | kill_bdev(bdev); | 147 | kill_bdev(bdev); |
133 | } | 148 | } |
149 | |||
150 | percpu_up_write(&bdev->bd_block_size_semaphore); | ||
151 | |||
134 | return 0; | 152 | return 0; |
135 | } | 153 | } |
136 | 154 | ||
@@ -441,6 +459,12 @@ static struct inode *bdev_alloc_inode(struct super_block *sb) | |||
441 | struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); | 459 | struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); |
442 | if (!ei) | 460 | if (!ei) |
443 | return NULL; | 461 | return NULL; |
462 | |||
463 | if (unlikely(percpu_init_rwsem(&ei->bdev.bd_block_size_semaphore))) { | ||
464 | kmem_cache_free(bdev_cachep, ei); | ||
465 | return NULL; | ||
466 | } | ||
467 | |||
444 | return &ei->vfs_inode; | 468 | return &ei->vfs_inode; |
445 | } | 469 | } |
446 | 470 | ||
@@ -449,6 +473,8 @@ static void bdev_i_callback(struct rcu_head *head) | |||
449 | struct inode *inode = container_of(head, struct inode, i_rcu); | 473 | struct inode *inode = container_of(head, struct inode, i_rcu); |
450 | struct bdev_inode *bdi = BDEV_I(inode); | 474 | struct bdev_inode *bdi = BDEV_I(inode); |
451 | 475 | ||
476 | percpu_free_rwsem(&bdi->bdev.bd_block_size_semaphore); | ||
477 | |||
452 | kmem_cache_free(bdev_cachep, bdi); | 478 | kmem_cache_free(bdev_cachep, bdi); |
453 | } | 479 | } |
454 | 480 | ||
@@ -1567,6 +1593,22 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
1567 | return blkdev_ioctl(bdev, mode, cmd, arg); | 1593 | return blkdev_ioctl(bdev, mode, cmd, arg); |
1568 | } | 1594 | } |
1569 | 1595 | ||
1596 | ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov, | ||
1597 | unsigned long nr_segs, loff_t pos) | ||
1598 | { | ||
1599 | ssize_t ret; | ||
1600 | struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host); | ||
1601 | |||
1602 | percpu_down_read(&bdev->bd_block_size_semaphore); | ||
1603 | |||
1604 | ret = generic_file_aio_read(iocb, iov, nr_segs, pos); | ||
1605 | |||
1606 | percpu_up_read(&bdev->bd_block_size_semaphore); | ||
1607 | |||
1608 | return ret; | ||
1609 | } | ||
1610 | EXPORT_SYMBOL_GPL(blkdev_aio_read); | ||
1611 | |||
1570 | /* | 1612 | /* |
1571 | * Write data to the block device. Only intended for the block device itself | 1613 | * Write data to the block device. Only intended for the block device itself |
1572 | * and the raw driver which basically is a fake block device. | 1614 | * and the raw driver which basically is a fake block device. |
@@ -1578,12 +1620,16 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
1578 | unsigned long nr_segs, loff_t pos) | 1620 | unsigned long nr_segs, loff_t pos) |
1579 | { | 1621 | { |
1580 | struct file *file = iocb->ki_filp; | 1622 | struct file *file = iocb->ki_filp; |
1623 | struct block_device *bdev = I_BDEV(file->f_mapping->host); | ||
1581 | struct blk_plug plug; | 1624 | struct blk_plug plug; |
1582 | ssize_t ret; | 1625 | ssize_t ret; |
1583 | 1626 | ||
1584 | BUG_ON(iocb->ki_pos != pos); | 1627 | BUG_ON(iocb->ki_pos != pos); |
1585 | 1628 | ||
1586 | blk_start_plug(&plug); | 1629 | blk_start_plug(&plug); |
1630 | |||
1631 | percpu_down_read(&bdev->bd_block_size_semaphore); | ||
1632 | |||
1587 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); | 1633 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); |
1588 | if (ret > 0 || ret == -EIOCBQUEUED) { | 1634 | if (ret > 0 || ret == -EIOCBQUEUED) { |
1589 | ssize_t err; | 1635 | ssize_t err; |
@@ -1592,11 +1638,29 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
1592 | if (err < 0 && ret > 0) | 1638 | if (err < 0 && ret > 0) |
1593 | ret = err; | 1639 | ret = err; |
1594 | } | 1640 | } |
1641 | |||
1642 | percpu_up_read(&bdev->bd_block_size_semaphore); | ||
1643 | |||
1595 | blk_finish_plug(&plug); | 1644 | blk_finish_plug(&plug); |
1645 | |||
1596 | return ret; | 1646 | return ret; |
1597 | } | 1647 | } |
1598 | EXPORT_SYMBOL_GPL(blkdev_aio_write); | 1648 | EXPORT_SYMBOL_GPL(blkdev_aio_write); |
1599 | 1649 | ||
1650 | static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) | ||
1651 | { | ||
1652 | int ret; | ||
1653 | struct block_device *bdev = I_BDEV(file->f_mapping->host); | ||
1654 | |||
1655 | percpu_down_read(&bdev->bd_block_size_semaphore); | ||
1656 | |||
1657 | ret = generic_file_mmap(file, vma); | ||
1658 | |||
1659 | percpu_up_read(&bdev->bd_block_size_semaphore); | ||
1660 | |||
1661 | return ret; | ||
1662 | } | ||
1663 | |||
1600 | /* | 1664 | /* |
1601 | * Try to release a page associated with block device when the system | 1665 | * Try to release a page associated with block device when the system |
1602 | * is under memory pressure. | 1666 | * is under memory pressure. |
@@ -1627,9 +1691,9 @@ const struct file_operations def_blk_fops = { | |||
1627 | .llseek = block_llseek, | 1691 | .llseek = block_llseek, |
1628 | .read = do_sync_read, | 1692 | .read = do_sync_read, |
1629 | .write = do_sync_write, | 1693 | .write = do_sync_write, |
1630 | .aio_read = generic_file_aio_read, | 1694 | .aio_read = blkdev_aio_read, |
1631 | .aio_write = blkdev_aio_write, | 1695 | .aio_write = blkdev_aio_write, |
1632 | .mmap = generic_file_mmap, | 1696 | .mmap = blkdev_mmap, |
1633 | .fsync = blkdev_fsync, | 1697 | .fsync = blkdev_fsync, |
1634 | .unlocked_ioctl = block_ioctl, | 1698 | .unlocked_ioctl = block_ioctl, |
1635 | #ifdef CONFIG_COMPAT | 1699 | #ifdef CONFIG_COMPAT |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index ff6475f409d6..f3187938e081 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -16,6 +16,7 @@ | |||
16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/vmalloc.h> | ||
19 | #include "ctree.h" | 20 | #include "ctree.h" |
20 | #include "disk-io.h" | 21 | #include "disk-io.h" |
21 | #include "backref.h" | 22 | #include "backref.h" |
@@ -231,7 +232,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | |||
231 | } | 232 | } |
232 | if (!ret) { | 233 | if (!ret) { |
233 | ret = ulist_add(parents, eb->start, | 234 | ret = ulist_add(parents, eb->start, |
234 | (unsigned long)eie, GFP_NOFS); | 235 | (uintptr_t)eie, GFP_NOFS); |
235 | if (ret < 0) | 236 | if (ret < 0) |
236 | break; | 237 | break; |
237 | if (!extent_item_pos) { | 238 | if (!extent_item_pos) { |
@@ -363,8 +364,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
363 | ULIST_ITER_INIT(&uiter); | 364 | ULIST_ITER_INIT(&uiter); |
364 | node = ulist_next(parents, &uiter); | 365 | node = ulist_next(parents, &uiter); |
365 | ref->parent = node ? node->val : 0; | 366 | ref->parent = node ? node->val : 0; |
366 | ref->inode_list = | 367 | ref->inode_list = node ? |
367 | node ? (struct extent_inode_elem *)node->aux : 0; | 368 | (struct extent_inode_elem *)(uintptr_t)node->aux : 0; |
368 | 369 | ||
369 | /* additional parents require new refs being added here */ | 370 | /* additional parents require new refs being added here */ |
370 | while ((node = ulist_next(parents, &uiter))) { | 371 | while ((node = ulist_next(parents, &uiter))) { |
@@ -375,8 +376,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
375 | } | 376 | } |
376 | memcpy(new_ref, ref, sizeof(*ref)); | 377 | memcpy(new_ref, ref, sizeof(*ref)); |
377 | new_ref->parent = node->val; | 378 | new_ref->parent = node->val; |
378 | new_ref->inode_list = | 379 | new_ref->inode_list = (struct extent_inode_elem *) |
379 | (struct extent_inode_elem *)node->aux; | 380 | (uintptr_t)node->aux; |
380 | list_add(&new_ref->list, &ref->list); | 381 | list_add(&new_ref->list, &ref->list); |
381 | } | 382 | } |
382 | ulist_reinit(parents); | 383 | ulist_reinit(parents); |
@@ -914,8 +915,8 @@ again: | |||
914 | free_extent_buffer(eb); | 915 | free_extent_buffer(eb); |
915 | } | 916 | } |
916 | ret = ulist_add_merge(refs, ref->parent, | 917 | ret = ulist_add_merge(refs, ref->parent, |
917 | (unsigned long)ref->inode_list, | 918 | (uintptr_t)ref->inode_list, |
918 | (unsigned long *)&eie, GFP_NOFS); | 919 | (u64 *)&eie, GFP_NOFS); |
919 | if (!ret && extent_item_pos) { | 920 | if (!ret && extent_item_pos) { |
920 | /* | 921 | /* |
921 | * we've recorded that parent, so we must extend | 922 | * we've recorded that parent, so we must extend |
@@ -959,7 +960,7 @@ static void free_leaf_list(struct ulist *blocks) | |||
959 | while ((node = ulist_next(blocks, &uiter))) { | 960 | while ((node = ulist_next(blocks, &uiter))) { |
960 | if (!node->aux) | 961 | if (!node->aux) |
961 | continue; | 962 | continue; |
962 | eie = (struct extent_inode_elem *)node->aux; | 963 | eie = (struct extent_inode_elem *)(uintptr_t)node->aux; |
963 | for (; eie; eie = eie_next) { | 964 | for (; eie; eie = eie_next) { |
964 | eie_next = eie->next; | 965 | eie_next = eie->next; |
965 | kfree(eie); | 966 | kfree(eie); |
@@ -1108,26 +1109,80 @@ static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, | |||
1108 | found_key); | 1109 | found_key); |
1109 | } | 1110 | } |
1110 | 1111 | ||
1111 | /* | 1112 | int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, |
1112 | * this iterates to turn a btrfs_inode_ref into a full filesystem path. elements | 1113 | u64 start_off, struct btrfs_path *path, |
1113 | * of the path are separated by '/' and the path is guaranteed to be | 1114 | struct btrfs_inode_extref **ret_extref, |
1114 | * 0-terminated. the path is only given within the current file system. | 1115 | u64 *found_off) |
1115 | * Therefore, it never starts with a '/'. the caller is responsible to provide | 1116 | { |
1116 | * "size" bytes in "dest". the dest buffer will be filled backwards. finally, | 1117 | int ret, slot; |
1117 | * the start point of the resulting string is returned. this pointer is within | 1118 | struct btrfs_key key; |
1118 | * dest, normally. | 1119 | struct btrfs_key found_key; |
1119 | * in case the path buffer would overflow, the pointer is decremented further | 1120 | struct btrfs_inode_extref *extref; |
1120 | * as if output was written to the buffer, though no more output is actually | 1121 | struct extent_buffer *leaf; |
1121 | * generated. that way, the caller can determine how much space would be | 1122 | unsigned long ptr; |
1122 | * required for the path to fit into the buffer. in that case, the returned | 1123 | |
1123 | * value will be smaller than dest. callers must check this! | 1124 | key.objectid = inode_objectid; |
1124 | */ | 1125 | btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY); |
1125 | char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | 1126 | key.offset = start_off; |
1126 | struct btrfs_inode_ref *iref, | 1127 | |
1128 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
1129 | if (ret < 0) | ||
1130 | return ret; | ||
1131 | |||
1132 | while (1) { | ||
1133 | leaf = path->nodes[0]; | ||
1134 | slot = path->slots[0]; | ||
1135 | if (slot >= btrfs_header_nritems(leaf)) { | ||
1136 | /* | ||
1137 | * If the item at offset is not found, | ||
1138 | * btrfs_search_slot will point us to the slot | ||
1139 | * where it should be inserted. In our case | ||
1140 | * that will be the slot directly before the | ||
1141 | * next INODE_REF_KEY_V2 item. In the case | ||
1142 | * that we're pointing to the last slot in a | ||
1143 | * leaf, we must move one leaf over. | ||
1144 | */ | ||
1145 | ret = btrfs_next_leaf(root, path); | ||
1146 | if (ret) { | ||
1147 | if (ret >= 1) | ||
1148 | ret = -ENOENT; | ||
1149 | break; | ||
1150 | } | ||
1151 | continue; | ||
1152 | } | ||
1153 | |||
1154 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
1155 | |||
1156 | /* | ||
1157 | * Check that we're still looking at an extended ref key for | ||
1158 | * this particular objectid. If we have different | ||
1159 | * objectid or type then there are no more to be found | ||
1160 | * in the tree and we can exit. | ||
1161 | */ | ||
1162 | ret = -ENOENT; | ||
1163 | if (found_key.objectid != inode_objectid) | ||
1164 | break; | ||
1165 | if (btrfs_key_type(&found_key) != BTRFS_INODE_EXTREF_KEY) | ||
1166 | break; | ||
1167 | |||
1168 | ret = 0; | ||
1169 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
1170 | extref = (struct btrfs_inode_extref *)ptr; | ||
1171 | *ret_extref = extref; | ||
1172 | if (found_off) | ||
1173 | *found_off = found_key.offset; | ||
1174 | break; | ||
1175 | } | ||
1176 | |||
1177 | return ret; | ||
1178 | } | ||
1179 | |||
1180 | static char *ref_to_path(struct btrfs_root *fs_root, | ||
1181 | struct btrfs_path *path, | ||
1182 | u32 name_len, unsigned long name_off, | ||
1127 | struct extent_buffer *eb_in, u64 parent, | 1183 | struct extent_buffer *eb_in, u64 parent, |
1128 | char *dest, u32 size) | 1184 | char *dest, u32 size) |
1129 | { | 1185 | { |
1130 | u32 len; | ||
1131 | int slot; | 1186 | int slot; |
1132 | u64 next_inum; | 1187 | u64 next_inum; |
1133 | int ret; | 1188 | int ret; |
@@ -1135,17 +1190,17 @@ char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | |||
1135 | struct extent_buffer *eb = eb_in; | 1190 | struct extent_buffer *eb = eb_in; |
1136 | struct btrfs_key found_key; | 1191 | struct btrfs_key found_key; |
1137 | int leave_spinning = path->leave_spinning; | 1192 | int leave_spinning = path->leave_spinning; |
1193 | struct btrfs_inode_ref *iref; | ||
1138 | 1194 | ||
1139 | if (bytes_left >= 0) | 1195 | if (bytes_left >= 0) |
1140 | dest[bytes_left] = '\0'; | 1196 | dest[bytes_left] = '\0'; |
1141 | 1197 | ||
1142 | path->leave_spinning = 1; | 1198 | path->leave_spinning = 1; |
1143 | while (1) { | 1199 | while (1) { |
1144 | len = btrfs_inode_ref_name_len(eb, iref); | 1200 | bytes_left -= name_len; |
1145 | bytes_left -= len; | ||
1146 | if (bytes_left >= 0) | 1201 | if (bytes_left >= 0) |
1147 | read_extent_buffer(eb, dest + bytes_left, | 1202 | read_extent_buffer(eb, dest + bytes_left, |
1148 | (unsigned long)(iref + 1), len); | 1203 | name_off, name_len); |
1149 | if (eb != eb_in) { | 1204 | if (eb != eb_in) { |
1150 | btrfs_tree_read_unlock_blocking(eb); | 1205 | btrfs_tree_read_unlock_blocking(eb); |
1151 | free_extent_buffer(eb); | 1206 | free_extent_buffer(eb); |
@@ -1155,6 +1210,7 @@ char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | |||
1155 | ret = -ENOENT; | 1210 | ret = -ENOENT; |
1156 | if (ret) | 1211 | if (ret) |
1157 | break; | 1212 | break; |
1213 | |||
1158 | next_inum = found_key.offset; | 1214 | next_inum = found_key.offset; |
1159 | 1215 | ||
1160 | /* regular exit ahead */ | 1216 | /* regular exit ahead */ |
@@ -1170,8 +1226,11 @@ char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | |||
1170 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); | 1226 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); |
1171 | } | 1227 | } |
1172 | btrfs_release_path(path); | 1228 | btrfs_release_path(path); |
1173 | |||
1174 | iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); | 1229 | iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); |
1230 | |||
1231 | name_len = btrfs_inode_ref_name_len(eb, iref); | ||
1232 | name_off = (unsigned long)(iref + 1); | ||
1233 | |||
1175 | parent = next_inum; | 1234 | parent = next_inum; |
1176 | --bytes_left; | 1235 | --bytes_left; |
1177 | if (bytes_left >= 0) | 1236 | if (bytes_left >= 0) |
@@ -1188,12 +1247,39 @@ char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | |||
1188 | } | 1247 | } |
1189 | 1248 | ||
1190 | /* | 1249 | /* |
1250 | * this iterates to turn a btrfs_inode_ref into a full filesystem path. elements | ||
1251 | * of the path are separated by '/' and the path is guaranteed to be | ||
1252 | * 0-terminated. the path is only given within the current file system. | ||
1253 | * Therefore, it never starts with a '/'. the caller is responsible to provide | ||
1254 | * "size" bytes in "dest". the dest buffer will be filled backwards. finally, | ||
1255 | * the start point of the resulting string is returned. this pointer is within | ||
1256 | * dest, normally. | ||
1257 | * in case the path buffer would overflow, the pointer is decremented further | ||
1258 | * as if output was written to the buffer, though no more output is actually | ||
1259 | * generated. that way, the caller can determine how much space would be | ||
1260 | * required for the path to fit into the buffer. in that case, the returned | ||
1261 | * value will be smaller than dest. callers must check this! | ||
1262 | */ | ||
1263 | char *btrfs_iref_to_path(struct btrfs_root *fs_root, | ||
1264 | struct btrfs_path *path, | ||
1265 | struct btrfs_inode_ref *iref, | ||
1266 | struct extent_buffer *eb_in, u64 parent, | ||
1267 | char *dest, u32 size) | ||
1268 | { | ||
1269 | return ref_to_path(fs_root, path, | ||
1270 | btrfs_inode_ref_name_len(eb_in, iref), | ||
1271 | (unsigned long)(iref + 1), | ||
1272 | eb_in, parent, dest, size); | ||
1273 | } | ||
1274 | |||
1275 | /* | ||
1191 | * this makes the path point to (logical EXTENT_ITEM *) | 1276 | * this makes the path point to (logical EXTENT_ITEM *) |
1192 | * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for | 1277 | * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for |
1193 | * tree blocks and <0 on error. | 1278 | * tree blocks and <0 on error. |
1194 | */ | 1279 | */ |
1195 | int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | 1280 | int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, |
1196 | struct btrfs_path *path, struct btrfs_key *found_key) | 1281 | struct btrfs_path *path, struct btrfs_key *found_key, |
1282 | u64 *flags_ret) | ||
1197 | { | 1283 | { |
1198 | int ret; | 1284 | int ret; |
1199 | u64 flags; | 1285 | u64 flags; |
@@ -1237,10 +1323,17 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | |||
1237 | (unsigned long long)found_key->objectid, | 1323 | (unsigned long long)found_key->objectid, |
1238 | (unsigned long long)found_key->offset, | 1324 | (unsigned long long)found_key->offset, |
1239 | (unsigned long long)flags, item_size); | 1325 | (unsigned long long)flags, item_size); |
1240 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) | 1326 | |
1241 | return BTRFS_EXTENT_FLAG_TREE_BLOCK; | 1327 | WARN_ON(!flags_ret); |
1242 | if (flags & BTRFS_EXTENT_FLAG_DATA) | 1328 | if (flags_ret) { |
1243 | return BTRFS_EXTENT_FLAG_DATA; | 1329 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) |
1330 | *flags_ret = BTRFS_EXTENT_FLAG_TREE_BLOCK; | ||
1331 | else if (flags & BTRFS_EXTENT_FLAG_DATA) | ||
1332 | *flags_ret = BTRFS_EXTENT_FLAG_DATA; | ||
1333 | else | ||
1334 | BUG_ON(1); | ||
1335 | return 0; | ||
1336 | } | ||
1244 | 1337 | ||
1245 | return -EIO; | 1338 | return -EIO; |
1246 | } | 1339 | } |
@@ -1404,12 +1497,13 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
1404 | ULIST_ITER_INIT(&root_uiter); | 1497 | ULIST_ITER_INIT(&root_uiter); |
1405 | while (!ret && (root_node = ulist_next(roots, &root_uiter))) { | 1498 | while (!ret && (root_node = ulist_next(roots, &root_uiter))) { |
1406 | pr_debug("root %llu references leaf %llu, data list " | 1499 | pr_debug("root %llu references leaf %llu, data list " |
1407 | "%#lx\n", root_node->val, ref_node->val, | 1500 | "%#llx\n", root_node->val, ref_node->val, |
1408 | ref_node->aux); | 1501 | (long long)ref_node->aux); |
1409 | ret = iterate_leaf_refs( | 1502 | ret = iterate_leaf_refs((struct extent_inode_elem *) |
1410 | (struct extent_inode_elem *)ref_node->aux, | 1503 | (uintptr_t)ref_node->aux, |
1411 | root_node->val, extent_item_objectid, | 1504 | root_node->val, |
1412 | iterate, ctx); | 1505 | extent_item_objectid, |
1506 | iterate, ctx); | ||
1413 | } | 1507 | } |
1414 | ulist_free(roots); | 1508 | ulist_free(roots); |
1415 | roots = NULL; | 1509 | roots = NULL; |
@@ -1432,15 +1526,15 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, | |||
1432 | { | 1526 | { |
1433 | int ret; | 1527 | int ret; |
1434 | u64 extent_item_pos; | 1528 | u64 extent_item_pos; |
1529 | u64 flags = 0; | ||
1435 | struct btrfs_key found_key; | 1530 | struct btrfs_key found_key; |
1436 | int search_commit_root = path->search_commit_root; | 1531 | int search_commit_root = path->search_commit_root; |
1437 | 1532 | ||
1438 | ret = extent_from_logical(fs_info, logical, path, | 1533 | ret = extent_from_logical(fs_info, logical, path, &found_key, &flags); |
1439 | &found_key); | ||
1440 | btrfs_release_path(path); | 1534 | btrfs_release_path(path); |
1441 | if (ret < 0) | 1535 | if (ret < 0) |
1442 | return ret; | 1536 | return ret; |
1443 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) | 1537 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) |
1444 | return -EINVAL; | 1538 | return -EINVAL; |
1445 | 1539 | ||
1446 | extent_item_pos = logical - found_key.objectid; | 1540 | extent_item_pos = logical - found_key.objectid; |
@@ -1451,9 +1545,12 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, | |||
1451 | return ret; | 1545 | return ret; |
1452 | } | 1546 | } |
1453 | 1547 | ||
1454 | static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, | 1548 | typedef int (iterate_irefs_t)(u64 parent, u32 name_len, unsigned long name_off, |
1455 | struct btrfs_path *path, | 1549 | struct extent_buffer *eb, void *ctx); |
1456 | iterate_irefs_t *iterate, void *ctx) | 1550 | |
1551 | static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root, | ||
1552 | struct btrfs_path *path, | ||
1553 | iterate_irefs_t *iterate, void *ctx) | ||
1457 | { | 1554 | { |
1458 | int ret = 0; | 1555 | int ret = 0; |
1459 | int slot; | 1556 | int slot; |
@@ -1470,7 +1567,7 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, | |||
1470 | while (!ret) { | 1567 | while (!ret) { |
1471 | path->leave_spinning = 1; | 1568 | path->leave_spinning = 1; |
1472 | ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, | 1569 | ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, |
1473 | &found_key); | 1570 | &found_key); |
1474 | if (ret < 0) | 1571 | if (ret < 0) |
1475 | break; | 1572 | break; |
1476 | if (ret) { | 1573 | if (ret) { |
@@ -1498,7 +1595,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, | |||
1498 | "tree %llu\n", cur, | 1595 | "tree %llu\n", cur, |
1499 | (unsigned long long)found_key.objectid, | 1596 | (unsigned long long)found_key.objectid, |
1500 | (unsigned long long)fs_root->objectid); | 1597 | (unsigned long long)fs_root->objectid); |
1501 | ret = iterate(parent, iref, eb, ctx); | 1598 | ret = iterate(parent, name_len, |
1599 | (unsigned long)(iref + 1), eb, ctx); | ||
1502 | if (ret) | 1600 | if (ret) |
1503 | break; | 1601 | break; |
1504 | len = sizeof(*iref) + name_len; | 1602 | len = sizeof(*iref) + name_len; |
@@ -1513,12 +1611,98 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, | |||
1513 | return ret; | 1611 | return ret; |
1514 | } | 1612 | } |
1515 | 1613 | ||
1614 | static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, | ||
1615 | struct btrfs_path *path, | ||
1616 | iterate_irefs_t *iterate, void *ctx) | ||
1617 | { | ||
1618 | int ret; | ||
1619 | int slot; | ||
1620 | u64 offset = 0; | ||
1621 | u64 parent; | ||
1622 | int found = 0; | ||
1623 | struct extent_buffer *eb; | ||
1624 | struct btrfs_inode_extref *extref; | ||
1625 | struct extent_buffer *leaf; | ||
1626 | u32 item_size; | ||
1627 | u32 cur_offset; | ||
1628 | unsigned long ptr; | ||
1629 | |||
1630 | while (1) { | ||
1631 | ret = btrfs_find_one_extref(fs_root, inum, offset, path, &extref, | ||
1632 | &offset); | ||
1633 | if (ret < 0) | ||
1634 | break; | ||
1635 | if (ret) { | ||
1636 | ret = found ? 0 : -ENOENT; | ||
1637 | break; | ||
1638 | } | ||
1639 | ++found; | ||
1640 | |||
1641 | slot = path->slots[0]; | ||
1642 | eb = path->nodes[0]; | ||
1643 | /* make sure we can use eb after releasing the path */ | ||
1644 | atomic_inc(&eb->refs); | ||
1645 | |||
1646 | btrfs_tree_read_lock(eb); | ||
1647 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); | ||
1648 | btrfs_release_path(path); | ||
1649 | |||
1650 | leaf = path->nodes[0]; | ||
1651 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
1652 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
1653 | cur_offset = 0; | ||
1654 | |||
1655 | while (cur_offset < item_size) { | ||
1656 | u32 name_len; | ||
1657 | |||
1658 | extref = (struct btrfs_inode_extref *)(ptr + cur_offset); | ||
1659 | parent = btrfs_inode_extref_parent(eb, extref); | ||
1660 | name_len = btrfs_inode_extref_name_len(eb, extref); | ||
1661 | ret = iterate(parent, name_len, | ||
1662 | (unsigned long)&extref->name, eb, ctx); | ||
1663 | if (ret) | ||
1664 | break; | ||
1665 | |||
1666 | cur_offset += btrfs_inode_extref_name_len(leaf, extref); | ||
1667 | cur_offset += sizeof(*extref); | ||
1668 | } | ||
1669 | btrfs_tree_read_unlock_blocking(eb); | ||
1670 | free_extent_buffer(eb); | ||
1671 | |||
1672 | offset++; | ||
1673 | } | ||
1674 | |||
1675 | btrfs_release_path(path); | ||
1676 | |||
1677 | return ret; | ||
1678 | } | ||
1679 | |||
1680 | static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, | ||
1681 | struct btrfs_path *path, iterate_irefs_t *iterate, | ||
1682 | void *ctx) | ||
1683 | { | ||
1684 | int ret; | ||
1685 | int found_refs = 0; | ||
1686 | |||
1687 | ret = iterate_inode_refs(inum, fs_root, path, iterate, ctx); | ||
1688 | if (!ret) | ||
1689 | ++found_refs; | ||
1690 | else if (ret != -ENOENT) | ||
1691 | return ret; | ||
1692 | |||
1693 | ret = iterate_inode_extrefs(inum, fs_root, path, iterate, ctx); | ||
1694 | if (ret == -ENOENT && found_refs) | ||
1695 | return 0; | ||
1696 | |||
1697 | return ret; | ||
1698 | } | ||
1699 | |||
1516 | /* | 1700 | /* |
1517 | * returns 0 if the path could be dumped (probably truncated) | 1701 | * returns 0 if the path could be dumped (probably truncated) |
1518 | * returns <0 in case of an error | 1702 | * returns <0 in case of an error |
1519 | */ | 1703 | */ |
1520 | static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref, | 1704 | static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off, |
1521 | struct extent_buffer *eb, void *ctx) | 1705 | struct extent_buffer *eb, void *ctx) |
1522 | { | 1706 | { |
1523 | struct inode_fs_paths *ipath = ctx; | 1707 | struct inode_fs_paths *ipath = ctx; |
1524 | char *fspath; | 1708 | char *fspath; |
@@ -1531,20 +1715,17 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref, | |||
1531 | ipath->fspath->bytes_left - s_ptr : 0; | 1715 | ipath->fspath->bytes_left - s_ptr : 0; |
1532 | 1716 | ||
1533 | fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr; | 1717 | fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr; |
1534 | fspath = btrfs_iref_to_path(ipath->fs_root, ipath->btrfs_path, iref, eb, | 1718 | fspath = ref_to_path(ipath->fs_root, ipath->btrfs_path, name_len, |
1535 | inum, fspath_min, bytes_left); | 1719 | name_off, eb, inum, fspath_min, |
1720 | bytes_left); | ||
1536 | if (IS_ERR(fspath)) | 1721 | if (IS_ERR(fspath)) |
1537 | return PTR_ERR(fspath); | 1722 | return PTR_ERR(fspath); |
1538 | 1723 | ||
1539 | if (fspath > fspath_min) { | 1724 | if (fspath > fspath_min) { |
1540 | pr_debug("path resolved: %s\n", fspath); | ||
1541 | ipath->fspath->val[i] = (u64)(unsigned long)fspath; | 1725 | ipath->fspath->val[i] = (u64)(unsigned long)fspath; |
1542 | ++ipath->fspath->elem_cnt; | 1726 | ++ipath->fspath->elem_cnt; |
1543 | ipath->fspath->bytes_left = fspath - fspath_min; | 1727 | ipath->fspath->bytes_left = fspath - fspath_min; |
1544 | } else { | 1728 | } else { |
1545 | pr_debug("missed path, not enough space. missing bytes: %lu, " | ||
1546 | "constructed so far: %s\n", | ||
1547 | (unsigned long)(fspath_min - fspath), fspath_min); | ||
1548 | ++ipath->fspath->elem_missed; | 1729 | ++ipath->fspath->elem_missed; |
1549 | ipath->fspath->bytes_missing += fspath_min - fspath; | 1730 | ipath->fspath->bytes_missing += fspath_min - fspath; |
1550 | ipath->fspath->bytes_left = 0; | 1731 | ipath->fspath->bytes_left = 0; |
@@ -1566,7 +1747,7 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref, | |||
1566 | int paths_from_inode(u64 inum, struct inode_fs_paths *ipath) | 1747 | int paths_from_inode(u64 inum, struct inode_fs_paths *ipath) |
1567 | { | 1748 | { |
1568 | return iterate_irefs(inum, ipath->fs_root, ipath->btrfs_path, | 1749 | return iterate_irefs(inum, ipath->fs_root, ipath->btrfs_path, |
1569 | inode_to_path, ipath); | 1750 | inode_to_path, ipath); |
1570 | } | 1751 | } |
1571 | 1752 | ||
1572 | struct btrfs_data_container *init_data_container(u32 total_bytes) | 1753 | struct btrfs_data_container *init_data_container(u32 total_bytes) |
@@ -1575,7 +1756,7 @@ struct btrfs_data_container *init_data_container(u32 total_bytes) | |||
1575 | size_t alloc_bytes; | 1756 | size_t alloc_bytes; |
1576 | 1757 | ||
1577 | alloc_bytes = max_t(size_t, total_bytes, sizeof(*data)); | 1758 | alloc_bytes = max_t(size_t, total_bytes, sizeof(*data)); |
1578 | data = kmalloc(alloc_bytes, GFP_NOFS); | 1759 | data = vmalloc(alloc_bytes); |
1579 | if (!data) | 1760 | if (!data) |
1580 | return ERR_PTR(-ENOMEM); | 1761 | return ERR_PTR(-ENOMEM); |
1581 | 1762 | ||
@@ -1626,6 +1807,6 @@ void free_ipath(struct inode_fs_paths *ipath) | |||
1626 | { | 1807 | { |
1627 | if (!ipath) | 1808 | if (!ipath) |
1628 | return; | 1809 | return; |
1629 | kfree(ipath->fspath); | 1810 | vfree(ipath->fspath); |
1630 | kfree(ipath); | 1811 | kfree(ipath); |
1631 | } | 1812 | } |
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 032f4dc7eab8..e75533043a5f 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h | |||
@@ -33,14 +33,13 @@ struct inode_fs_paths { | |||
33 | 33 | ||
34 | typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, | 34 | typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, |
35 | void *ctx); | 35 | void *ctx); |
36 | typedef int (iterate_irefs_t)(u64 parent, struct btrfs_inode_ref *iref, | ||
37 | struct extent_buffer *eb, void *ctx); | ||
38 | 36 | ||
39 | int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, | 37 | int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, |
40 | struct btrfs_path *path); | 38 | struct btrfs_path *path); |
41 | 39 | ||
42 | int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | 40 | int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, |
43 | struct btrfs_path *path, struct btrfs_key *found_key); | 41 | struct btrfs_path *path, struct btrfs_key *found_key, |
42 | u64 *flags); | ||
44 | 43 | ||
45 | int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, | 44 | int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, |
46 | struct btrfs_extent_item *ei, u32 item_size, | 45 | struct btrfs_extent_item *ei, u32 item_size, |
@@ -69,4 +68,9 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, | |||
69 | struct btrfs_path *path); | 68 | struct btrfs_path *path); |
70 | void free_ipath(struct inode_fs_paths *ipath); | 69 | void free_ipath(struct inode_fs_paths *ipath); |
71 | 70 | ||
71 | int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, | ||
72 | u64 start_off, struct btrfs_path *path, | ||
73 | struct btrfs_inode_extref **ret_extref, | ||
74 | u64 *found_off); | ||
75 | |||
72 | #endif | 76 | #endif |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 5b2ad6bc4fe7..ed8ca7ca5eff 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -38,6 +38,7 @@ | |||
38 | #define BTRFS_INODE_DELALLOC_META_RESERVED 4 | 38 | #define BTRFS_INODE_DELALLOC_META_RESERVED 4 |
39 | #define BTRFS_INODE_HAS_ORPHAN_ITEM 5 | 39 | #define BTRFS_INODE_HAS_ORPHAN_ITEM 5 |
40 | #define BTRFS_INODE_HAS_ASYNC_EXTENT 6 | 40 | #define BTRFS_INODE_HAS_ASYNC_EXTENT 6 |
41 | #define BTRFS_INODE_NEEDS_FULL_SYNC 7 | ||
41 | 42 | ||
42 | /* in memory btrfs inode */ | 43 | /* in memory btrfs inode */ |
43 | struct btrfs_inode { | 44 | struct btrfs_inode { |
@@ -143,6 +144,9 @@ struct btrfs_inode { | |||
143 | /* flags field from the on disk inode */ | 144 | /* flags field from the on disk inode */ |
144 | u32 flags; | 145 | u32 flags; |
145 | 146 | ||
147 | /* a local copy of root's last_log_commit */ | ||
148 | unsigned long last_log_commit; | ||
149 | |||
146 | /* | 150 | /* |
147 | * Counters to keep track of the number of extent item's we may use due | 151 | * Counters to keep track of the number of extent item's we may use due |
148 | * to delalloc and such. outstanding_extents is the number of extent | 152 | * to delalloc and such. outstanding_extents is the number of extent |
@@ -202,15 +206,10 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode) | |||
202 | 206 | ||
203 | static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) | 207 | static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) |
204 | { | 208 | { |
205 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
206 | int ret = 0; | ||
207 | |||
208 | mutex_lock(&root->log_mutex); | ||
209 | if (BTRFS_I(inode)->logged_trans == generation && | 209 | if (BTRFS_I(inode)->logged_trans == generation && |
210 | BTRFS_I(inode)->last_sub_trans <= root->last_log_commit) | 210 | BTRFS_I(inode)->last_sub_trans <= BTRFS_I(inode)->last_log_commit) |
211 | ret = 1; | 211 | return 1; |
212 | mutex_unlock(&root->log_mutex); | 212 | return 0; |
213 | return ret; | ||
214 | } | 213 | } |
215 | 214 | ||
216 | #endif | 215 | #endif |
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 9197e2e33407..5a3e45db642a 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c | |||
@@ -37,8 +37,9 @@ | |||
37 | * the file system was mounted, (i.e., they have been | 37 | * the file system was mounted, (i.e., they have been |
38 | * referenced by the super block) or they have been | 38 | * referenced by the super block) or they have been |
39 | * written since then and the write completion callback | 39 | * written since then and the write completion callback |
40 | * was called and a FLUSH request to the device where | 40 | * was called and no write error was indicated and a |
41 | * these blocks are located was received and completed. | 41 | * FLUSH request to the device where these blocks are |
42 | * located was received and completed. | ||
42 | * 2b. All referenced blocks need to have a generation | 43 | * 2b. All referenced blocks need to have a generation |
43 | * number which is equal to the parent's number. | 44 | * number which is equal to the parent's number. |
44 | * | 45 | * |
@@ -2601,6 +2602,17 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, | |||
2601 | (unsigned long long)l->block_ref_to->dev_bytenr, | 2602 | (unsigned long long)l->block_ref_to->dev_bytenr, |
2602 | l->block_ref_to->mirror_num); | 2603 | l->block_ref_to->mirror_num); |
2603 | ret = -1; | 2604 | ret = -1; |
2605 | } else if (l->block_ref_to->iodone_w_error) { | ||
2606 | printk(KERN_INFO "btrfs: attempt to write superblock" | ||
2607 | " which references block %c @%llu (%s/%llu/%d)" | ||
2608 | " which has write error!\n", | ||
2609 | btrfsic_get_block_type(state, l->block_ref_to), | ||
2610 | (unsigned long long) | ||
2611 | l->block_ref_to->logical_bytenr, | ||
2612 | l->block_ref_to->dev_state->name, | ||
2613 | (unsigned long long)l->block_ref_to->dev_bytenr, | ||
2614 | l->block_ref_to->mirror_num); | ||
2615 | ret = -1; | ||
2604 | } else if (l->parent_generation != | 2616 | } else if (l->parent_generation != |
2605 | l->block_ref_to->generation && | 2617 | l->block_ref_to->generation && |
2606 | BTRFSIC_GENERATION_UNKNOWN != | 2618 | BTRFSIC_GENERATION_UNKNOWN != |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 43d1c5a3a030..c6467aa88bee 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -577,6 +577,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
577 | u64 em_start; | 577 | u64 em_start; |
578 | struct extent_map *em; | 578 | struct extent_map *em; |
579 | int ret = -ENOMEM; | 579 | int ret = -ENOMEM; |
580 | int faili = 0; | ||
580 | u32 *sums; | 581 | u32 *sums; |
581 | 582 | ||
582 | tree = &BTRFS_I(inode)->io_tree; | 583 | tree = &BTRFS_I(inode)->io_tree; |
@@ -626,9 +627,13 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
626 | for (pg_index = 0; pg_index < nr_pages; pg_index++) { | 627 | for (pg_index = 0; pg_index < nr_pages; pg_index++) { |
627 | cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS | | 628 | cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS | |
628 | __GFP_HIGHMEM); | 629 | __GFP_HIGHMEM); |
629 | if (!cb->compressed_pages[pg_index]) | 630 | if (!cb->compressed_pages[pg_index]) { |
631 | faili = pg_index - 1; | ||
632 | ret = -ENOMEM; | ||
630 | goto fail2; | 633 | goto fail2; |
634 | } | ||
631 | } | 635 | } |
636 | faili = nr_pages - 1; | ||
632 | cb->nr_pages = nr_pages; | 637 | cb->nr_pages = nr_pages; |
633 | 638 | ||
634 | add_ra_bio_pages(inode, em_start + em_len, cb); | 639 | add_ra_bio_pages(inode, em_start + em_len, cb); |
@@ -713,8 +718,10 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
713 | return 0; | 718 | return 0; |
714 | 719 | ||
715 | fail2: | 720 | fail2: |
716 | for (pg_index = 0; pg_index < nr_pages; pg_index++) | 721 | while (faili >= 0) { |
717 | free_page((unsigned long)cb->compressed_pages[pg_index]); | 722 | __free_page(cb->compressed_pages[faili]); |
723 | faili--; | ||
724 | } | ||
718 | 725 | ||
719 | kfree(cb->compressed_pages); | 726 | kfree(cb->compressed_pages); |
720 | fail1: | 727 | fail1: |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6d183f60d63a..b33436211000 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -4402,149 +4402,6 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans, | |||
4402 | } | 4402 | } |
4403 | 4403 | ||
4404 | /* | 4404 | /* |
4405 | * Given a key and some data, insert items into the tree. | ||
4406 | * This does all the path init required, making room in the tree if needed. | ||
4407 | * Returns the number of keys that were inserted. | ||
4408 | */ | ||
4409 | int btrfs_insert_some_items(struct btrfs_trans_handle *trans, | ||
4410 | struct btrfs_root *root, | ||
4411 | struct btrfs_path *path, | ||
4412 | struct btrfs_key *cpu_key, u32 *data_size, | ||
4413 | int nr) | ||
4414 | { | ||
4415 | struct extent_buffer *leaf; | ||
4416 | struct btrfs_item *item; | ||
4417 | int ret = 0; | ||
4418 | int slot; | ||
4419 | int i; | ||
4420 | u32 nritems; | ||
4421 | u32 total_data = 0; | ||
4422 | u32 total_size = 0; | ||
4423 | unsigned int data_end; | ||
4424 | struct btrfs_disk_key disk_key; | ||
4425 | struct btrfs_key found_key; | ||
4426 | struct btrfs_map_token token; | ||
4427 | |||
4428 | btrfs_init_map_token(&token); | ||
4429 | |||
4430 | for (i = 0; i < nr; i++) { | ||
4431 | if (total_size + data_size[i] + sizeof(struct btrfs_item) > | ||
4432 | BTRFS_LEAF_DATA_SIZE(root)) { | ||
4433 | break; | ||
4434 | nr = i; | ||
4435 | } | ||
4436 | total_data += data_size[i]; | ||
4437 | total_size += data_size[i] + sizeof(struct btrfs_item); | ||
4438 | } | ||
4439 | BUG_ON(nr == 0); | ||
4440 | |||
4441 | ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); | ||
4442 | if (ret == 0) | ||
4443 | return -EEXIST; | ||
4444 | if (ret < 0) | ||
4445 | goto out; | ||
4446 | |||
4447 | leaf = path->nodes[0]; | ||
4448 | |||
4449 | nritems = btrfs_header_nritems(leaf); | ||
4450 | data_end = leaf_data_end(root, leaf); | ||
4451 | |||
4452 | if (btrfs_leaf_free_space(root, leaf) < total_size) { | ||
4453 | for (i = nr; i >= 0; i--) { | ||
4454 | total_data -= data_size[i]; | ||
4455 | total_size -= data_size[i] + sizeof(struct btrfs_item); | ||
4456 | if (total_size < btrfs_leaf_free_space(root, leaf)) | ||
4457 | break; | ||
4458 | } | ||
4459 | nr = i; | ||
4460 | } | ||
4461 | |||
4462 | slot = path->slots[0]; | ||
4463 | BUG_ON(slot < 0); | ||
4464 | |||
4465 | if (slot != nritems) { | ||
4466 | unsigned int old_data = btrfs_item_end_nr(leaf, slot); | ||
4467 | |||
4468 | item = btrfs_item_nr(leaf, slot); | ||
4469 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
4470 | |||
4471 | /* figure out how many keys we can insert in here */ | ||
4472 | total_data = data_size[0]; | ||
4473 | for (i = 1; i < nr; i++) { | ||
4474 | if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0) | ||
4475 | break; | ||
4476 | total_data += data_size[i]; | ||
4477 | } | ||
4478 | nr = i; | ||
4479 | |||
4480 | if (old_data < data_end) { | ||
4481 | btrfs_print_leaf(root, leaf); | ||
4482 | printk(KERN_CRIT "slot %d old_data %d data_end %d\n", | ||
4483 | slot, old_data, data_end); | ||
4484 | BUG_ON(1); | ||
4485 | } | ||
4486 | /* | ||
4487 | * item0..itemN ... dataN.offset..dataN.size .. data0.size | ||
4488 | */ | ||
4489 | /* first correct the data pointers */ | ||
4490 | for (i = slot; i < nritems; i++) { | ||
4491 | u32 ioff; | ||
4492 | |||
4493 | item = btrfs_item_nr(leaf, i); | ||
4494 | ioff = btrfs_token_item_offset(leaf, item, &token); | ||
4495 | btrfs_set_token_item_offset(leaf, item, | ||
4496 | ioff - total_data, &token); | ||
4497 | } | ||
4498 | /* shift the items */ | ||
4499 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), | ||
4500 | btrfs_item_nr_offset(slot), | ||
4501 | (nritems - slot) * sizeof(struct btrfs_item)); | ||
4502 | |||
4503 | /* shift the data */ | ||
4504 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | ||
4505 | data_end - total_data, btrfs_leaf_data(leaf) + | ||
4506 | data_end, old_data - data_end); | ||
4507 | data_end = old_data; | ||
4508 | } else { | ||
4509 | /* | ||
4510 | * this sucks but it has to be done, if we are inserting at | ||
4511 | * the end of the leaf only insert 1 of the items, since we | ||
4512 | * have no way of knowing whats on the next leaf and we'd have | ||
4513 | * to drop our current locks to figure it out | ||
4514 | */ | ||
4515 | nr = 1; | ||
4516 | } | ||
4517 | |||
4518 | /* setup the item for the new data */ | ||
4519 | for (i = 0; i < nr; i++) { | ||
4520 | btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); | ||
4521 | btrfs_set_item_key(leaf, &disk_key, slot + i); | ||
4522 | item = btrfs_item_nr(leaf, slot + i); | ||
4523 | btrfs_set_token_item_offset(leaf, item, | ||
4524 | data_end - data_size[i], &token); | ||
4525 | data_end -= data_size[i]; | ||
4526 | btrfs_set_token_item_size(leaf, item, data_size[i], &token); | ||
4527 | } | ||
4528 | btrfs_set_header_nritems(leaf, nritems + nr); | ||
4529 | btrfs_mark_buffer_dirty(leaf); | ||
4530 | |||
4531 | ret = 0; | ||
4532 | if (slot == 0) { | ||
4533 | btrfs_cpu_key_to_disk(&disk_key, cpu_key); | ||
4534 | fixup_low_keys(trans, root, path, &disk_key, 1); | ||
4535 | } | ||
4536 | |||
4537 | if (btrfs_leaf_free_space(root, leaf) < 0) { | ||
4538 | btrfs_print_leaf(root, leaf); | ||
4539 | BUG(); | ||
4540 | } | ||
4541 | out: | ||
4542 | if (!ret) | ||
4543 | ret = nr; | ||
4544 | return ret; | ||
4545 | } | ||
4546 | |||
4547 | /* | ||
4548 | * this is a helper for btrfs_insert_empty_items, the main goal here is | 4405 | * this is a helper for btrfs_insert_empty_items, the main goal here is |
4549 | * to save stack depth by doing the bulk of the work in a function | 4406 | * to save stack depth by doing the bulk of the work in a function |
4550 | * that doesn't call btrfs_search_slot | 4407 | * that doesn't call btrfs_search_slot |
@@ -5073,6 +4930,7 @@ static void tree_move_down(struct btrfs_root *root, | |||
5073 | struct btrfs_path *path, | 4930 | struct btrfs_path *path, |
5074 | int *level, int root_level) | 4931 | int *level, int root_level) |
5075 | { | 4932 | { |
4933 | BUG_ON(*level == 0); | ||
5076 | path->nodes[*level - 1] = read_node_slot(root, path->nodes[*level], | 4934 | path->nodes[*level - 1] = read_node_slot(root, path->nodes[*level], |
5077 | path->slots[*level]); | 4935 | path->slots[*level]); |
5078 | path->slots[*level - 1] = 0; | 4936 | path->slots[*level - 1] = 0; |
@@ -5089,7 +4947,7 @@ static int tree_move_next_or_upnext(struct btrfs_root *root, | |||
5089 | 4947 | ||
5090 | path->slots[*level]++; | 4948 | path->slots[*level]++; |
5091 | 4949 | ||
5092 | while (path->slots[*level] == nritems) { | 4950 | while (path->slots[*level] >= nritems) { |
5093 | if (*level == root_level) | 4951 | if (*level == root_level) |
5094 | return -1; | 4952 | return -1; |
5095 | 4953 | ||
@@ -5433,9 +5291,11 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
5433 | goto out; | 5291 | goto out; |
5434 | advance_right = ADVANCE; | 5292 | advance_right = ADVANCE; |
5435 | } else { | 5293 | } else { |
5294 | WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); | ||
5436 | ret = tree_compare_item(left_root, left_path, | 5295 | ret = tree_compare_item(left_root, left_path, |
5437 | right_path, tmp_buf); | 5296 | right_path, tmp_buf); |
5438 | if (ret) { | 5297 | if (ret) { |
5298 | WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); | ||
5439 | ret = changed_cb(left_root, right_root, | 5299 | ret = changed_cb(left_root, right_root, |
5440 | left_path, right_path, | 5300 | left_path, right_path, |
5441 | &left_key, | 5301 | &left_key, |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9821b672f5a2..926c9ffc66d9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -154,6 +154,13 @@ struct btrfs_ordered_sum; | |||
154 | */ | 154 | */ |
155 | #define BTRFS_NAME_LEN 255 | 155 | #define BTRFS_NAME_LEN 255 |
156 | 156 | ||
157 | /* | ||
158 | * Theoretical limit is larger, but we keep this down to a sane | ||
159 | * value. That should limit greatly the possibility of collisions on | ||
160 | * inode ref items. | ||
161 | */ | ||
162 | #define BTRFS_LINK_MAX 65535U | ||
163 | |||
157 | /* 32 bytes in various csum fields */ | 164 | /* 32 bytes in various csum fields */ |
158 | #define BTRFS_CSUM_SIZE 32 | 165 | #define BTRFS_CSUM_SIZE 32 |
159 | 166 | ||
@@ -489,6 +496,8 @@ struct btrfs_super_block { | |||
489 | */ | 496 | */ |
490 | #define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5) | 497 | #define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5) |
491 | 498 | ||
499 | #define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6) | ||
500 | |||
492 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL | 501 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL |
493 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL | 502 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL |
494 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ | 503 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ |
@@ -496,7 +505,8 @@ struct btrfs_super_block { | |||
496 | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ | 505 | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ |
497 | BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ | 506 | BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ |
498 | BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ | 507 | BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ |
499 | BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO) | 508 | BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ |
509 | BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) | ||
500 | 510 | ||
501 | /* | 511 | /* |
502 | * A leaf is full of items. offset and size tell us where to find | 512 | * A leaf is full of items. offset and size tell us where to find |
@@ -643,6 +653,14 @@ struct btrfs_inode_ref { | |||
643 | /* name goes here */ | 653 | /* name goes here */ |
644 | } __attribute__ ((__packed__)); | 654 | } __attribute__ ((__packed__)); |
645 | 655 | ||
656 | struct btrfs_inode_extref { | ||
657 | __le64 parent_objectid; | ||
658 | __le64 index; | ||
659 | __le16 name_len; | ||
660 | __u8 name[0]; | ||
661 | /* name goes here */ | ||
662 | } __attribute__ ((__packed__)); | ||
663 | |||
646 | struct btrfs_timespec { | 664 | struct btrfs_timespec { |
647 | __le64 sec; | 665 | __le64 sec; |
648 | __le32 nsec; | 666 | __le32 nsec; |
@@ -1028,12 +1046,22 @@ struct btrfs_space_info { | |||
1028 | wait_queue_head_t wait; | 1046 | wait_queue_head_t wait; |
1029 | }; | 1047 | }; |
1030 | 1048 | ||
1049 | #define BTRFS_BLOCK_RSV_GLOBAL 1 | ||
1050 | #define BTRFS_BLOCK_RSV_DELALLOC 2 | ||
1051 | #define BTRFS_BLOCK_RSV_TRANS 3 | ||
1052 | #define BTRFS_BLOCK_RSV_CHUNK 4 | ||
1053 | #define BTRFS_BLOCK_RSV_DELOPS 5 | ||
1054 | #define BTRFS_BLOCK_RSV_EMPTY 6 | ||
1055 | #define BTRFS_BLOCK_RSV_TEMP 7 | ||
1056 | |||
1031 | struct btrfs_block_rsv { | 1057 | struct btrfs_block_rsv { |
1032 | u64 size; | 1058 | u64 size; |
1033 | u64 reserved; | 1059 | u64 reserved; |
1034 | struct btrfs_space_info *space_info; | 1060 | struct btrfs_space_info *space_info; |
1035 | spinlock_t lock; | 1061 | spinlock_t lock; |
1036 | unsigned int full; | 1062 | unsigned short full; |
1063 | unsigned short type; | ||
1064 | unsigned short failfast; | ||
1037 | }; | 1065 | }; |
1038 | 1066 | ||
1039 | /* | 1067 | /* |
@@ -1127,6 +1155,9 @@ struct btrfs_block_group_cache { | |||
1127 | * Today it will only have one thing on it, but that may change | 1155 | * Today it will only have one thing on it, but that may change |
1128 | */ | 1156 | */ |
1129 | struct list_head cluster_list; | 1157 | struct list_head cluster_list; |
1158 | |||
1159 | /* For delayed block group creation */ | ||
1160 | struct list_head new_bg_list; | ||
1130 | }; | 1161 | }; |
1131 | 1162 | ||
1132 | /* delayed seq elem */ | 1163 | /* delayed seq elem */ |
@@ -1240,7 +1271,6 @@ struct btrfs_fs_info { | |||
1240 | struct mutex reloc_mutex; | 1271 | struct mutex reloc_mutex; |
1241 | 1272 | ||
1242 | struct list_head trans_list; | 1273 | struct list_head trans_list; |
1243 | struct list_head hashers; | ||
1244 | struct list_head dead_roots; | 1274 | struct list_head dead_roots; |
1245 | struct list_head caching_block_groups; | 1275 | struct list_head caching_block_groups; |
1246 | 1276 | ||
@@ -1366,9 +1396,6 @@ struct btrfs_fs_info { | |||
1366 | struct rb_root defrag_inodes; | 1396 | struct rb_root defrag_inodes; |
1367 | atomic_t defrag_running; | 1397 | atomic_t defrag_running; |
1368 | 1398 | ||
1369 | spinlock_t ref_cache_lock; | ||
1370 | u64 total_ref_cache_size; | ||
1371 | |||
1372 | /* | 1399 | /* |
1373 | * these three are in extended format (availability of single | 1400 | * these three are in extended format (availability of single |
1374 | * chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other | 1401 | * chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other |
@@ -1441,6 +1468,8 @@ struct btrfs_fs_info { | |||
1441 | 1468 | ||
1442 | /* next backup root to be overwritten */ | 1469 | /* next backup root to be overwritten */ |
1443 | int backup_root_index; | 1470 | int backup_root_index; |
1471 | |||
1472 | int num_tolerated_disk_barrier_failures; | ||
1444 | }; | 1473 | }; |
1445 | 1474 | ||
1446 | /* | 1475 | /* |
@@ -1481,9 +1510,9 @@ struct btrfs_root { | |||
1481 | wait_queue_head_t log_commit_wait[2]; | 1510 | wait_queue_head_t log_commit_wait[2]; |
1482 | atomic_t log_writers; | 1511 | atomic_t log_writers; |
1483 | atomic_t log_commit[2]; | 1512 | atomic_t log_commit[2]; |
1513 | atomic_t log_batch; | ||
1484 | unsigned long log_transid; | 1514 | unsigned long log_transid; |
1485 | unsigned long last_log_commit; | 1515 | unsigned long last_log_commit; |
1486 | unsigned long log_batch; | ||
1487 | pid_t log_start_pid; | 1516 | pid_t log_start_pid; |
1488 | bool log_multiple_pids; | 1517 | bool log_multiple_pids; |
1489 | 1518 | ||
@@ -1592,6 +1621,7 @@ struct btrfs_ioctl_defrag_range_args { | |||
1592 | */ | 1621 | */ |
1593 | #define BTRFS_INODE_ITEM_KEY 1 | 1622 | #define BTRFS_INODE_ITEM_KEY 1 |
1594 | #define BTRFS_INODE_REF_KEY 12 | 1623 | #define BTRFS_INODE_REF_KEY 12 |
1624 | #define BTRFS_INODE_EXTREF_KEY 13 | ||
1595 | #define BTRFS_XATTR_ITEM_KEY 24 | 1625 | #define BTRFS_XATTR_ITEM_KEY 24 |
1596 | #define BTRFS_ORPHAN_ITEM_KEY 48 | 1626 | #define BTRFS_ORPHAN_ITEM_KEY 48 |
1597 | /* reserve 2-15 close to the inode for later flexibility */ | 1627 | /* reserve 2-15 close to the inode for later flexibility */ |
@@ -1978,6 +2008,13 @@ BTRFS_SETGET_STACK_FUNCS(block_group_flags, | |||
1978 | BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); | 2008 | BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); |
1979 | BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64); | 2009 | BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64); |
1980 | 2010 | ||
2011 | /* struct btrfs_inode_extref */ | ||
2012 | BTRFS_SETGET_FUNCS(inode_extref_parent, struct btrfs_inode_extref, | ||
2013 | parent_objectid, 64); | ||
2014 | BTRFS_SETGET_FUNCS(inode_extref_name_len, struct btrfs_inode_extref, | ||
2015 | name_len, 16); | ||
2016 | BTRFS_SETGET_FUNCS(inode_extref_index, struct btrfs_inode_extref, index, 64); | ||
2017 | |||
1981 | /* struct btrfs_inode_item */ | 2018 | /* struct btrfs_inode_item */ |
1982 | BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); | 2019 | BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); |
1983 | BTRFS_SETGET_FUNCS(inode_sequence, struct btrfs_inode_item, sequence, 64); | 2020 | BTRFS_SETGET_FUNCS(inode_sequence, struct btrfs_inode_item, sequence, 64); |
@@ -2858,6 +2895,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
2858 | u64 size); | 2895 | u64 size); |
2859 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 2896 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
2860 | struct btrfs_root *root, u64 group_start); | 2897 | struct btrfs_root *root, u64 group_start); |
2898 | void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, | ||
2899 | struct btrfs_root *root); | ||
2861 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | 2900 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); |
2862 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); | 2901 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); |
2863 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2902 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
@@ -2874,8 +2913,9 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes); | |||
2874 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes); | 2913 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes); |
2875 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes); | 2914 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes); |
2876 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes); | 2915 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes); |
2877 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv); | 2916 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type); |
2878 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root); | 2917 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root, |
2918 | unsigned short type); | ||
2879 | void btrfs_free_block_rsv(struct btrfs_root *root, | 2919 | void btrfs_free_block_rsv(struct btrfs_root *root, |
2880 | struct btrfs_block_rsv *rsv); | 2920 | struct btrfs_block_rsv *rsv); |
2881 | int btrfs_block_rsv_add(struct btrfs_root *root, | 2921 | int btrfs_block_rsv_add(struct btrfs_root *root, |
@@ -3172,12 +3212,12 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
3172 | struct btrfs_root *root, | 3212 | struct btrfs_root *root, |
3173 | const char *name, int name_len, | 3213 | const char *name, int name_len, |
3174 | u64 inode_objectid, u64 ref_objectid, u64 *index); | 3214 | u64 inode_objectid, u64 ref_objectid, u64 *index); |
3175 | struct btrfs_inode_ref * | 3215 | int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans, |
3176 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | 3216 | struct btrfs_root *root, |
3177 | struct btrfs_root *root, | 3217 | struct btrfs_path *path, |
3178 | struct btrfs_path *path, | 3218 | const char *name, int name_len, |
3179 | const char *name, int name_len, | 3219 | u64 inode_objectid, u64 ref_objectid, int mod, |
3180 | u64 inode_objectid, u64 ref_objectid, int mod); | 3220 | u64 *ret_index); |
3181 | int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, | 3221 | int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, |
3182 | struct btrfs_root *root, | 3222 | struct btrfs_root *root, |
3183 | struct btrfs_path *path, u64 objectid); | 3223 | struct btrfs_path *path, u64 objectid); |
@@ -3185,6 +3225,19 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root | |||
3185 | *root, struct btrfs_path *path, | 3225 | *root, struct btrfs_path *path, |
3186 | struct btrfs_key *location, int mod); | 3226 | struct btrfs_key *location, int mod); |
3187 | 3227 | ||
3228 | struct btrfs_inode_extref * | ||
3229 | btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, | ||
3230 | struct btrfs_root *root, | ||
3231 | struct btrfs_path *path, | ||
3232 | const char *name, int name_len, | ||
3233 | u64 inode_objectid, u64 ref_objectid, int ins_len, | ||
3234 | int cow); | ||
3235 | |||
3236 | int btrfs_find_name_in_ext_backref(struct btrfs_path *path, | ||
3237 | u64 ref_objectid, const char *name, | ||
3238 | int name_len, | ||
3239 | struct btrfs_inode_extref **extref_ret); | ||
3240 | |||
3188 | /* file-item.c */ | 3241 | /* file-item.c */ |
3189 | int btrfs_del_csums(struct btrfs_trans_handle *trans, | 3242 | int btrfs_del_csums(struct btrfs_trans_handle *trans, |
3190 | struct btrfs_root *root, u64 bytenr, u64 len); | 3243 | struct btrfs_root *root, u64 bytenr, u64 len); |
@@ -3249,6 +3302,8 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | |||
3249 | struct btrfs_root *root, | 3302 | struct btrfs_root *root, |
3250 | struct inode *dir, u64 objectid, | 3303 | struct inode *dir, u64 objectid, |
3251 | const char *name, int name_len); | 3304 | const char *name, int name_len); |
3305 | int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len, | ||
3306 | int front); | ||
3252 | int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | 3307 | int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, |
3253 | struct btrfs_root *root, | 3308 | struct btrfs_root *root, |
3254 | struct inode *inode, u64 new_size, | 3309 | struct inode *inode, u64 new_size, |
@@ -3308,16 +3363,27 @@ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); | |||
3308 | int btrfs_defrag_file(struct inode *inode, struct file *file, | 3363 | int btrfs_defrag_file(struct inode *inode, struct file *file, |
3309 | struct btrfs_ioctl_defrag_range_args *range, | 3364 | struct btrfs_ioctl_defrag_range_args *range, |
3310 | u64 newer_than, unsigned long max_pages); | 3365 | u64 newer_than, unsigned long max_pages); |
3366 | void btrfs_get_block_group_info(struct list_head *groups_list, | ||
3367 | struct btrfs_ioctl_space_info *space); | ||
3368 | |||
3311 | /* file.c */ | 3369 | /* file.c */ |
3312 | int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | 3370 | int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, |
3313 | struct inode *inode); | 3371 | struct inode *inode); |
3314 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); | 3372 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); |
3315 | int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); | 3373 | int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); |
3316 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 3374 | void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
3317 | int skip_pinned); | 3375 | int skip_pinned); |
3376 | int btrfs_replace_extent_cache(struct inode *inode, struct extent_map *replace, | ||
3377 | u64 start, u64 end, int skip_pinned, | ||
3378 | int modified); | ||
3318 | extern const struct file_operations btrfs_file_operations; | 3379 | extern const struct file_operations btrfs_file_operations; |
3319 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, | 3380 | int __btrfs_drop_extents(struct btrfs_trans_handle *trans, |
3320 | u64 start, u64 end, u64 *hint_byte, int drop_cache); | 3381 | struct btrfs_root *root, struct inode *inode, |
3382 | struct btrfs_path *path, u64 start, u64 end, | ||
3383 | u64 *drop_end, int drop_cache); | ||
3384 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, | ||
3385 | struct btrfs_root *root, struct inode *inode, u64 start, | ||
3386 | u64 end, int drop_cache); | ||
3321 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | 3387 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, |
3322 | struct inode *inode, u64 start, u64 end); | 3388 | struct inode *inode, u64 start, u64 end); |
3323 | int btrfs_release_file(struct inode *inode, struct file *file); | 3389 | int btrfs_release_file(struct inode *inode, struct file *file); |
@@ -3378,6 +3444,11 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, | |||
3378 | } | 3444 | } |
3379 | } | 3445 | } |
3380 | 3446 | ||
3447 | /* | ||
3448 | * Call btrfs_abort_transaction as early as possible when an error condition is | ||
3449 | * detected, that way the exact line number is reported. | ||
3450 | */ | ||
3451 | |||
3381 | #define btrfs_abort_transaction(trans, root, errno) \ | 3452 | #define btrfs_abort_transaction(trans, root, errno) \ |
3382 | do { \ | 3453 | do { \ |
3383 | __btrfs_abort_transaction(trans, root, __func__, \ | 3454 | __btrfs_abort_transaction(trans, root, __func__, \ |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 52c85e2b95d0..478f66bdc57b 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -29,7 +29,7 @@ static struct kmem_cache *delayed_node_cache; | |||
29 | 29 | ||
30 | int __init btrfs_delayed_inode_init(void) | 30 | int __init btrfs_delayed_inode_init(void) |
31 | { | 31 | { |
32 | delayed_node_cache = kmem_cache_create("delayed_node", | 32 | delayed_node_cache = kmem_cache_create("btrfs_delayed_node", |
33 | sizeof(struct btrfs_delayed_node), | 33 | sizeof(struct btrfs_delayed_node), |
34 | 0, | 34 | 0, |
35 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | 35 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, |
@@ -650,7 +650,7 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
650 | * we're accounted for. | 650 | * we're accounted for. |
651 | */ | 651 | */ |
652 | if (!src_rsv || (!trans->bytes_reserved && | 652 | if (!src_rsv || (!trans->bytes_reserved && |
653 | src_rsv != &root->fs_info->delalloc_block_rsv)) { | 653 | src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { |
654 | ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); | 654 | ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); |
655 | /* | 655 | /* |
656 | * Since we're under a transaction reserve_metadata_bytes could | 656 | * Since we're under a transaction reserve_metadata_bytes could |
@@ -668,7 +668,7 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
668 | num_bytes, 1); | 668 | num_bytes, 1); |
669 | } | 669 | } |
670 | return ret; | 670 | return ret; |
671 | } else if (src_rsv == &root->fs_info->delalloc_block_rsv) { | 671 | } else if (src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) { |
672 | spin_lock(&BTRFS_I(inode)->lock); | 672 | spin_lock(&BTRFS_I(inode)->lock); |
673 | if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, | 673 | if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, |
674 | &BTRFS_I(inode)->runtime_flags)) { | 674 | &BTRFS_I(inode)->runtime_flags)) { |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 22e98e04c2ea..7cda51995c1e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -46,6 +46,10 @@ | |||
46 | #include "check-integrity.h" | 46 | #include "check-integrity.h" |
47 | #include "rcu-string.h" | 47 | #include "rcu-string.h" |
48 | 48 | ||
49 | #ifdef CONFIG_X86 | ||
50 | #include <asm/cpufeature.h> | ||
51 | #endif | ||
52 | |||
49 | static struct extent_io_ops btree_extent_io_ops; | 53 | static struct extent_io_ops btree_extent_io_ops; |
50 | static void end_workqueue_fn(struct btrfs_work *work); | 54 | static void end_workqueue_fn(struct btrfs_work *work); |
51 | static void free_fs_root(struct btrfs_root *root); | 55 | static void free_fs_root(struct btrfs_root *root); |
@@ -217,26 +221,16 @@ static struct extent_map *btree_get_extent(struct inode *inode, | |||
217 | write_lock(&em_tree->lock); | 221 | write_lock(&em_tree->lock); |
218 | ret = add_extent_mapping(em_tree, em); | 222 | ret = add_extent_mapping(em_tree, em); |
219 | if (ret == -EEXIST) { | 223 | if (ret == -EEXIST) { |
220 | u64 failed_start = em->start; | ||
221 | u64 failed_len = em->len; | ||
222 | |||
223 | free_extent_map(em); | 224 | free_extent_map(em); |
224 | em = lookup_extent_mapping(em_tree, start, len); | 225 | em = lookup_extent_mapping(em_tree, start, len); |
225 | if (em) { | 226 | if (!em) |
226 | ret = 0; | 227 | em = ERR_PTR(-EIO); |
227 | } else { | ||
228 | em = lookup_extent_mapping(em_tree, failed_start, | ||
229 | failed_len); | ||
230 | ret = -EIO; | ||
231 | } | ||
232 | } else if (ret) { | 228 | } else if (ret) { |
233 | free_extent_map(em); | 229 | free_extent_map(em); |
234 | em = NULL; | 230 | em = ERR_PTR(ret); |
235 | } | 231 | } |
236 | write_unlock(&em_tree->lock); | 232 | write_unlock(&em_tree->lock); |
237 | 233 | ||
238 | if (ret) | ||
239 | em = ERR_PTR(ret); | ||
240 | out: | 234 | out: |
241 | return em; | 235 | return em; |
242 | } | 236 | } |
@@ -439,10 +433,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | |||
439 | WARN_ON(1); | 433 | WARN_ON(1); |
440 | return 0; | 434 | return 0; |
441 | } | 435 | } |
442 | if (eb->pages[0] != page) { | ||
443 | WARN_ON(1); | ||
444 | return 0; | ||
445 | } | ||
446 | if (!PageUptodate(page)) { | 436 | if (!PageUptodate(page)) { |
447 | WARN_ON(1); | 437 | WARN_ON(1); |
448 | return 0; | 438 | return 0; |
@@ -869,10 +859,22 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
869 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); | 859 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); |
870 | } | 860 | } |
871 | 861 | ||
862 | static int check_async_write(struct inode *inode, unsigned long bio_flags) | ||
863 | { | ||
864 | if (bio_flags & EXTENT_BIO_TREE_LOG) | ||
865 | return 0; | ||
866 | #ifdef CONFIG_X86 | ||
867 | if (cpu_has_xmm4_2) | ||
868 | return 0; | ||
869 | #endif | ||
870 | return 1; | ||
871 | } | ||
872 | |||
872 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 873 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
873 | int mirror_num, unsigned long bio_flags, | 874 | int mirror_num, unsigned long bio_flags, |
874 | u64 bio_offset) | 875 | u64 bio_offset) |
875 | { | 876 | { |
877 | int async = check_async_write(inode, bio_flags); | ||
876 | int ret; | 878 | int ret; |
877 | 879 | ||
878 | if (!(rw & REQ_WRITE)) { | 880 | if (!(rw & REQ_WRITE)) { |
@@ -887,6 +889,12 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
887 | return ret; | 889 | return ret; |
888 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | 890 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, |
889 | mirror_num, 0); | 891 | mirror_num, 0); |
892 | } else if (!async) { | ||
893 | ret = btree_csum_one_bio(bio); | ||
894 | if (ret) | ||
895 | return ret; | ||
896 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | ||
897 | mirror_num, 0); | ||
890 | } | 898 | } |
891 | 899 | ||
892 | /* | 900 | /* |
@@ -1168,8 +1176,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1168 | atomic_set(&root->log_commit[0], 0); | 1176 | atomic_set(&root->log_commit[0], 0); |
1169 | atomic_set(&root->log_commit[1], 0); | 1177 | atomic_set(&root->log_commit[1], 0); |
1170 | atomic_set(&root->log_writers, 0); | 1178 | atomic_set(&root->log_writers, 0); |
1179 | atomic_set(&root->log_batch, 0); | ||
1171 | atomic_set(&root->orphan_inodes, 0); | 1180 | atomic_set(&root->orphan_inodes, 0); |
1172 | root->log_batch = 0; | ||
1173 | root->log_transid = 0; | 1181 | root->log_transid = 0; |
1174 | root->last_log_commit = 0; | 1182 | root->last_log_commit = 0; |
1175 | extent_io_tree_init(&root->dirty_log_pages, | 1183 | extent_io_tree_init(&root->dirty_log_pages, |
@@ -1667,9 +1675,10 @@ static int transaction_kthread(void *arg) | |||
1667 | spin_unlock(&root->fs_info->trans_lock); | 1675 | spin_unlock(&root->fs_info->trans_lock); |
1668 | 1676 | ||
1669 | /* If the file system is aborted, this will always fail. */ | 1677 | /* If the file system is aborted, this will always fail. */ |
1670 | trans = btrfs_join_transaction(root); | 1678 | trans = btrfs_attach_transaction(root); |
1671 | if (IS_ERR(trans)) { | 1679 | if (IS_ERR(trans)) { |
1672 | cannot_commit = true; | 1680 | if (PTR_ERR(trans) != -ENOENT) |
1681 | cannot_commit = true; | ||
1673 | goto sleep; | 1682 | goto sleep; |
1674 | } | 1683 | } |
1675 | if (transid == trans->transid) { | 1684 | if (transid == trans->transid) { |
@@ -1994,13 +2003,11 @@ int open_ctree(struct super_block *sb, | |||
1994 | INIT_LIST_HEAD(&fs_info->trans_list); | 2003 | INIT_LIST_HEAD(&fs_info->trans_list); |
1995 | INIT_LIST_HEAD(&fs_info->dead_roots); | 2004 | INIT_LIST_HEAD(&fs_info->dead_roots); |
1996 | INIT_LIST_HEAD(&fs_info->delayed_iputs); | 2005 | INIT_LIST_HEAD(&fs_info->delayed_iputs); |
1997 | INIT_LIST_HEAD(&fs_info->hashers); | ||
1998 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | 2006 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); |
1999 | INIT_LIST_HEAD(&fs_info->ordered_operations); | 2007 | INIT_LIST_HEAD(&fs_info->ordered_operations); |
2000 | INIT_LIST_HEAD(&fs_info->caching_block_groups); | 2008 | INIT_LIST_HEAD(&fs_info->caching_block_groups); |
2001 | spin_lock_init(&fs_info->delalloc_lock); | 2009 | spin_lock_init(&fs_info->delalloc_lock); |
2002 | spin_lock_init(&fs_info->trans_lock); | 2010 | spin_lock_init(&fs_info->trans_lock); |
2003 | spin_lock_init(&fs_info->ref_cache_lock); | ||
2004 | spin_lock_init(&fs_info->fs_roots_radix_lock); | 2011 | spin_lock_init(&fs_info->fs_roots_radix_lock); |
2005 | spin_lock_init(&fs_info->delayed_iput_lock); | 2012 | spin_lock_init(&fs_info->delayed_iput_lock); |
2006 | spin_lock_init(&fs_info->defrag_inodes_lock); | 2013 | spin_lock_init(&fs_info->defrag_inodes_lock); |
@@ -2014,12 +2021,15 @@ int open_ctree(struct super_block *sb, | |||
2014 | INIT_LIST_HEAD(&fs_info->space_info); | 2021 | INIT_LIST_HEAD(&fs_info->space_info); |
2015 | INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); | 2022 | INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); |
2016 | btrfs_mapping_init(&fs_info->mapping_tree); | 2023 | btrfs_mapping_init(&fs_info->mapping_tree); |
2017 | btrfs_init_block_rsv(&fs_info->global_block_rsv); | 2024 | btrfs_init_block_rsv(&fs_info->global_block_rsv, |
2018 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); | 2025 | BTRFS_BLOCK_RSV_GLOBAL); |
2019 | btrfs_init_block_rsv(&fs_info->trans_block_rsv); | 2026 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv, |
2020 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv); | 2027 | BTRFS_BLOCK_RSV_DELALLOC); |
2021 | btrfs_init_block_rsv(&fs_info->empty_block_rsv); | 2028 | btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS); |
2022 | btrfs_init_block_rsv(&fs_info->delayed_block_rsv); | 2029 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK); |
2030 | btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY); | ||
2031 | btrfs_init_block_rsv(&fs_info->delayed_block_rsv, | ||
2032 | BTRFS_BLOCK_RSV_DELOPS); | ||
2023 | atomic_set(&fs_info->nr_async_submits, 0); | 2033 | atomic_set(&fs_info->nr_async_submits, 0); |
2024 | atomic_set(&fs_info->async_delalloc_pages, 0); | 2034 | atomic_set(&fs_info->async_delalloc_pages, 0); |
2025 | atomic_set(&fs_info->async_submit_draining, 0); | 2035 | atomic_set(&fs_info->async_submit_draining, 0); |
@@ -2491,6 +2501,8 @@ retry_root_backup: | |||
2491 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); | 2501 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); |
2492 | goto fail_block_groups; | 2502 | goto fail_block_groups; |
2493 | } | 2503 | } |
2504 | fs_info->num_tolerated_disk_barrier_failures = | ||
2505 | btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); | ||
2494 | 2506 | ||
2495 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, | 2507 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, |
2496 | "btrfs-cleaner"); | 2508 | "btrfs-cleaner"); |
@@ -2874,12 +2886,10 @@ static int write_dev_flush(struct btrfs_device *device, int wait) | |||
2874 | printk_in_rcu("btrfs: disabling barriers on dev %s\n", | 2886 | printk_in_rcu("btrfs: disabling barriers on dev %s\n", |
2875 | rcu_str_deref(device->name)); | 2887 | rcu_str_deref(device->name)); |
2876 | device->nobarriers = 1; | 2888 | device->nobarriers = 1; |
2877 | } | 2889 | } else if (!bio_flagged(bio, BIO_UPTODATE)) { |
2878 | if (!bio_flagged(bio, BIO_UPTODATE)) { | ||
2879 | ret = -EIO; | 2890 | ret = -EIO; |
2880 | if (!bio_flagged(bio, BIO_EOPNOTSUPP)) | 2891 | btrfs_dev_stat_inc_and_print(device, |
2881 | btrfs_dev_stat_inc_and_print(device, | 2892 | BTRFS_DEV_STAT_FLUSH_ERRS); |
2882 | BTRFS_DEV_STAT_FLUSH_ERRS); | ||
2883 | } | 2893 | } |
2884 | 2894 | ||
2885 | /* drop the reference from the wait == 0 run */ | 2895 | /* drop the reference from the wait == 0 run */ |
@@ -2918,14 +2928,15 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
2918 | { | 2928 | { |
2919 | struct list_head *head; | 2929 | struct list_head *head; |
2920 | struct btrfs_device *dev; | 2930 | struct btrfs_device *dev; |
2921 | int errors = 0; | 2931 | int errors_send = 0; |
2932 | int errors_wait = 0; | ||
2922 | int ret; | 2933 | int ret; |
2923 | 2934 | ||
2924 | /* send down all the barriers */ | 2935 | /* send down all the barriers */ |
2925 | head = &info->fs_devices->devices; | 2936 | head = &info->fs_devices->devices; |
2926 | list_for_each_entry_rcu(dev, head, dev_list) { | 2937 | list_for_each_entry_rcu(dev, head, dev_list) { |
2927 | if (!dev->bdev) { | 2938 | if (!dev->bdev) { |
2928 | errors++; | 2939 | errors_send++; |
2929 | continue; | 2940 | continue; |
2930 | } | 2941 | } |
2931 | if (!dev->in_fs_metadata || !dev->writeable) | 2942 | if (!dev->in_fs_metadata || !dev->writeable) |
@@ -2933,13 +2944,13 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
2933 | 2944 | ||
2934 | ret = write_dev_flush(dev, 0); | 2945 | ret = write_dev_flush(dev, 0); |
2935 | if (ret) | 2946 | if (ret) |
2936 | errors++; | 2947 | errors_send++; |
2937 | } | 2948 | } |
2938 | 2949 | ||
2939 | /* wait for all the barriers */ | 2950 | /* wait for all the barriers */ |
2940 | list_for_each_entry_rcu(dev, head, dev_list) { | 2951 | list_for_each_entry_rcu(dev, head, dev_list) { |
2941 | if (!dev->bdev) { | 2952 | if (!dev->bdev) { |
2942 | errors++; | 2953 | errors_wait++; |
2943 | continue; | 2954 | continue; |
2944 | } | 2955 | } |
2945 | if (!dev->in_fs_metadata || !dev->writeable) | 2956 | if (!dev->in_fs_metadata || !dev->writeable) |
@@ -2947,13 +2958,87 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
2947 | 2958 | ||
2948 | ret = write_dev_flush(dev, 1); | 2959 | ret = write_dev_flush(dev, 1); |
2949 | if (ret) | 2960 | if (ret) |
2950 | errors++; | 2961 | errors_wait++; |
2951 | } | 2962 | } |
2952 | if (errors) | 2963 | if (errors_send > info->num_tolerated_disk_barrier_failures || |
2964 | errors_wait > info->num_tolerated_disk_barrier_failures) | ||
2953 | return -EIO; | 2965 | return -EIO; |
2954 | return 0; | 2966 | return 0; |
2955 | } | 2967 | } |
2956 | 2968 | ||
2969 | int btrfs_calc_num_tolerated_disk_barrier_failures( | ||
2970 | struct btrfs_fs_info *fs_info) | ||
2971 | { | ||
2972 | struct btrfs_ioctl_space_info space; | ||
2973 | struct btrfs_space_info *sinfo; | ||
2974 | u64 types[] = {BTRFS_BLOCK_GROUP_DATA, | ||
2975 | BTRFS_BLOCK_GROUP_SYSTEM, | ||
2976 | BTRFS_BLOCK_GROUP_METADATA, | ||
2977 | BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA}; | ||
2978 | int num_types = 4; | ||
2979 | int i; | ||
2980 | int c; | ||
2981 | int num_tolerated_disk_barrier_failures = | ||
2982 | (int)fs_info->fs_devices->num_devices; | ||
2983 | |||
2984 | for (i = 0; i < num_types; i++) { | ||
2985 | struct btrfs_space_info *tmp; | ||
2986 | |||
2987 | sinfo = NULL; | ||
2988 | rcu_read_lock(); | ||
2989 | list_for_each_entry_rcu(tmp, &fs_info->space_info, list) { | ||
2990 | if (tmp->flags == types[i]) { | ||
2991 | sinfo = tmp; | ||
2992 | break; | ||
2993 | } | ||
2994 | } | ||
2995 | rcu_read_unlock(); | ||
2996 | |||
2997 | if (!sinfo) | ||
2998 | continue; | ||
2999 | |||
3000 | down_read(&sinfo->groups_sem); | ||
3001 | for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { | ||
3002 | if (!list_empty(&sinfo->block_groups[c])) { | ||
3003 | u64 flags; | ||
3004 | |||
3005 | btrfs_get_block_group_info( | ||
3006 | &sinfo->block_groups[c], &space); | ||
3007 | if (space.total_bytes == 0 || | ||
3008 | space.used_bytes == 0) | ||
3009 | continue; | ||
3010 | flags = space.flags; | ||
3011 | /* | ||
3012 | * return | ||
3013 | * 0: if dup, single or RAID0 is configured for | ||
3014 | * any of metadata, system or data, else | ||
3015 | * 1: if RAID5 is configured, or if RAID1 or | ||
3016 | * RAID10 is configured and only two mirrors | ||
3017 | * are used, else | ||
3018 | * 2: if RAID6 is configured, else | ||
3019 | * num_mirrors - 1: if RAID1 or RAID10 is | ||
3020 | * configured and more than | ||
3021 | * 2 mirrors are used. | ||
3022 | */ | ||
3023 | if (num_tolerated_disk_barrier_failures > 0 && | ||
3024 | ((flags & (BTRFS_BLOCK_GROUP_DUP | | ||
3025 | BTRFS_BLOCK_GROUP_RAID0)) || | ||
3026 | ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) | ||
3027 | == 0))) | ||
3028 | num_tolerated_disk_barrier_failures = 0; | ||
3029 | else if (num_tolerated_disk_barrier_failures > 1 | ||
3030 | && | ||
3031 | (flags & (BTRFS_BLOCK_GROUP_RAID1 | | ||
3032 | BTRFS_BLOCK_GROUP_RAID10))) | ||
3033 | num_tolerated_disk_barrier_failures = 1; | ||
3034 | } | ||
3035 | } | ||
3036 | up_read(&sinfo->groups_sem); | ||
3037 | } | ||
3038 | |||
3039 | return num_tolerated_disk_barrier_failures; | ||
3040 | } | ||
3041 | |||
2957 | int write_all_supers(struct btrfs_root *root, int max_mirrors) | 3042 | int write_all_supers(struct btrfs_root *root, int max_mirrors) |
2958 | { | 3043 | { |
2959 | struct list_head *head; | 3044 | struct list_head *head; |
@@ -2976,8 +3061,16 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
2976 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 3061 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
2977 | head = &root->fs_info->fs_devices->devices; | 3062 | head = &root->fs_info->fs_devices->devices; |
2978 | 3063 | ||
2979 | if (do_barriers) | 3064 | if (do_barriers) { |
2980 | barrier_all_devices(root->fs_info); | 3065 | ret = barrier_all_devices(root->fs_info); |
3066 | if (ret) { | ||
3067 | mutex_unlock( | ||
3068 | &root->fs_info->fs_devices->device_list_mutex); | ||
3069 | btrfs_error(root->fs_info, ret, | ||
3070 | "errors while submitting device barriers."); | ||
3071 | return ret; | ||
3072 | } | ||
3073 | } | ||
2981 | 3074 | ||
2982 | list_for_each_entry_rcu(dev, head, dev_list) { | 3075 | list_for_each_entry_rcu(dev, head, dev_list) { |
2983 | if (!dev->bdev) { | 3076 | if (!dev->bdev) { |
@@ -3211,10 +3304,6 @@ int close_ctree(struct btrfs_root *root) | |||
3211 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", | 3304 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", |
3212 | (unsigned long long)fs_info->delalloc_bytes); | 3305 | (unsigned long long)fs_info->delalloc_bytes); |
3213 | } | 3306 | } |
3214 | if (fs_info->total_ref_cache_size) { | ||
3215 | printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", | ||
3216 | (unsigned long long)fs_info->total_ref_cache_size); | ||
3217 | } | ||
3218 | 3307 | ||
3219 | free_extent_buffer(fs_info->extent_root->node); | 3308 | free_extent_buffer(fs_info->extent_root->node); |
3220 | free_extent_buffer(fs_info->extent_root->commit_root); | 3309 | free_extent_buffer(fs_info->extent_root->commit_root); |
@@ -3360,52 +3449,6 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | |||
3360 | return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); | 3449 | return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); |
3361 | } | 3450 | } |
3362 | 3451 | ||
3363 | int btree_lock_page_hook(struct page *page, void *data, | ||
3364 | void (*flush_fn)(void *)) | ||
3365 | { | ||
3366 | struct inode *inode = page->mapping->host; | ||
3367 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3368 | struct extent_buffer *eb; | ||
3369 | |||
3370 | /* | ||
3371 | * We culled this eb but the page is still hanging out on the mapping, | ||
3372 | * carry on. | ||
3373 | */ | ||
3374 | if (!PagePrivate(page)) | ||
3375 | goto out; | ||
3376 | |||
3377 | eb = (struct extent_buffer *)page->private; | ||
3378 | if (!eb) { | ||
3379 | WARN_ON(1); | ||
3380 | goto out; | ||
3381 | } | ||
3382 | if (page != eb->pages[0]) | ||
3383 | goto out; | ||
3384 | |||
3385 | if (!btrfs_try_tree_write_lock(eb)) { | ||
3386 | flush_fn(data); | ||
3387 | btrfs_tree_lock(eb); | ||
3388 | } | ||
3389 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); | ||
3390 | |||
3391 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | ||
3392 | spin_lock(&root->fs_info->delalloc_lock); | ||
3393 | if (root->fs_info->dirty_metadata_bytes >= eb->len) | ||
3394 | root->fs_info->dirty_metadata_bytes -= eb->len; | ||
3395 | else | ||
3396 | WARN_ON(1); | ||
3397 | spin_unlock(&root->fs_info->delalloc_lock); | ||
3398 | } | ||
3399 | |||
3400 | btrfs_tree_unlock(eb); | ||
3401 | out: | ||
3402 | if (!trylock_page(page)) { | ||
3403 | flush_fn(data); | ||
3404 | lock_page(page); | ||
3405 | } | ||
3406 | return 0; | ||
3407 | } | ||
3408 | |||
3409 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | 3452 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, |
3410 | int read_only) | 3453 | int read_only) |
3411 | { | 3454 | { |
@@ -3608,7 +3651,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, | |||
3608 | 3651 | ||
3609 | while (1) { | 3652 | while (1) { |
3610 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, | 3653 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, |
3611 | mark); | 3654 | mark, NULL); |
3612 | if (ret) | 3655 | if (ret) |
3613 | break; | 3656 | break; |
3614 | 3657 | ||
@@ -3663,7 +3706,7 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | |||
3663 | again: | 3706 | again: |
3664 | while (1) { | 3707 | while (1) { |
3665 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 3708 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
3666 | EXTENT_DIRTY); | 3709 | EXTENT_DIRTY, NULL); |
3667 | if (ret) | 3710 | if (ret) |
3668 | break; | 3711 | break; |
3669 | 3712 | ||
@@ -3800,7 +3843,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3800 | } | 3843 | } |
3801 | 3844 | ||
3802 | static struct extent_io_ops btree_extent_io_ops = { | 3845 | static struct extent_io_ops btree_extent_io_ops = { |
3803 | .write_cache_pages_lock_hook = btree_lock_page_hook, | ||
3804 | .readpage_end_io_hook = btree_readpage_end_io_hook, | 3846 | .readpage_end_io_hook = btree_readpage_end_io_hook, |
3805 | .readpage_io_failed_hook = btree_io_failed_hook, | 3847 | .readpage_io_failed_hook = btree_io_failed_hook, |
3806 | .submit_bio_hook = btree_submit_bio_hook, | 3848 | .submit_bio_hook = btree_submit_bio_hook, |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c5b00a735fef..2025a9132c16 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -95,6 +95,8 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, | |||
95 | u64 objectid); | 95 | u64 objectid); |
96 | int btree_lock_page_hook(struct page *page, void *data, | 96 | int btree_lock_page_hook(struct page *page, void *data, |
97 | void (*flush_fn)(void *)); | 97 | void (*flush_fn)(void *)); |
98 | int btrfs_calc_num_tolerated_disk_barrier_failures( | ||
99 | struct btrfs_fs_info *fs_info); | ||
98 | 100 | ||
99 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 101 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
100 | void btrfs_init_lockdep(void); | 102 | void btrfs_init_lockdep(void); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ba58024d40d3..3d3e2c17d8d1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -94,8 +94,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
94 | u64 flags, struct btrfs_disk_key *key, | 94 | u64 flags, struct btrfs_disk_key *key, |
95 | int level, struct btrfs_key *ins); | 95 | int level, struct btrfs_key *ins); |
96 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 96 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
97 | struct btrfs_root *extent_root, u64 alloc_bytes, | 97 | struct btrfs_root *extent_root, u64 flags, |
98 | u64 flags, int force); | 98 | int force); |
99 | static int find_next_key(struct btrfs_path *path, int level, | 99 | static int find_next_key(struct btrfs_path *path, int level, |
100 | struct btrfs_key *key); | 100 | struct btrfs_key *key); |
101 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | 101 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
@@ -312,7 +312,8 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
312 | while (start < end) { | 312 | while (start < end) { |
313 | ret = find_first_extent_bit(info->pinned_extents, start, | 313 | ret = find_first_extent_bit(info->pinned_extents, start, |
314 | &extent_start, &extent_end, | 314 | &extent_start, &extent_end, |
315 | EXTENT_DIRTY | EXTENT_UPTODATE); | 315 | EXTENT_DIRTY | EXTENT_UPTODATE, |
316 | NULL); | ||
316 | if (ret) | 317 | if (ret) |
317 | break; | 318 | break; |
318 | 319 | ||
@@ -2361,10 +2362,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2361 | } | 2362 | } |
2362 | 2363 | ||
2363 | next: | 2364 | next: |
2364 | do_chunk_alloc(trans, fs_info->extent_root, | ||
2365 | 2 * 1024 * 1024, | ||
2366 | btrfs_get_alloc_profile(root, 0), | ||
2367 | CHUNK_ALLOC_NO_FORCE); | ||
2368 | cond_resched(); | 2365 | cond_resched(); |
2369 | spin_lock(&delayed_refs->lock); | 2366 | spin_lock(&delayed_refs->lock); |
2370 | } | 2367 | } |
@@ -2478,10 +2475,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
2478 | if (root == root->fs_info->extent_root) | 2475 | if (root == root->fs_info->extent_root) |
2479 | root = root->fs_info->tree_root; | 2476 | root = root->fs_info->tree_root; |
2480 | 2477 | ||
2481 | do_chunk_alloc(trans, root->fs_info->extent_root, | ||
2482 | 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0), | ||
2483 | CHUNK_ALLOC_NO_FORCE); | ||
2484 | |||
2485 | btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); | 2478 | btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); |
2486 | 2479 | ||
2487 | delayed_refs = &trans->transaction->delayed_refs; | 2480 | delayed_refs = &trans->transaction->delayed_refs; |
@@ -2551,6 +2544,12 @@ again: | |||
2551 | } | 2544 | } |
2552 | 2545 | ||
2553 | if (run_all) { | 2546 | if (run_all) { |
2547 | if (!list_empty(&trans->new_bgs)) { | ||
2548 | spin_unlock(&delayed_refs->lock); | ||
2549 | btrfs_create_pending_block_groups(trans, root); | ||
2550 | spin_lock(&delayed_refs->lock); | ||
2551 | } | ||
2552 | |||
2554 | node = rb_first(&delayed_refs->root); | 2553 | node = rb_first(&delayed_refs->root); |
2555 | if (!node) | 2554 | if (!node) |
2556 | goto out; | 2555 | goto out; |
@@ -3406,7 +3405,6 @@ alloc: | |||
3406 | return PTR_ERR(trans); | 3405 | return PTR_ERR(trans); |
3407 | 3406 | ||
3408 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 3407 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
3409 | bytes + 2 * 1024 * 1024, | ||
3410 | alloc_target, | 3408 | alloc_target, |
3411 | CHUNK_ALLOC_NO_FORCE); | 3409 | CHUNK_ALLOC_NO_FORCE); |
3412 | btrfs_end_transaction(trans, root); | 3410 | btrfs_end_transaction(trans, root); |
@@ -3488,8 +3486,7 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) | |||
3488 | } | 3486 | } |
3489 | 3487 | ||
3490 | static int should_alloc_chunk(struct btrfs_root *root, | 3488 | static int should_alloc_chunk(struct btrfs_root *root, |
3491 | struct btrfs_space_info *sinfo, u64 alloc_bytes, | 3489 | struct btrfs_space_info *sinfo, int force) |
3492 | int force) | ||
3493 | { | 3490 | { |
3494 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | 3491 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; |
3495 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; | 3492 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; |
@@ -3504,7 +3501,8 @@ static int should_alloc_chunk(struct btrfs_root *root, | |||
3504 | * and purposes it's used space. Don't worry about locking the | 3501 | * and purposes it's used space. Don't worry about locking the |
3505 | * global_rsv, it doesn't change except when the transaction commits. | 3502 | * global_rsv, it doesn't change except when the transaction commits. |
3506 | */ | 3503 | */ |
3507 | num_allocated += global_rsv->size; | 3504 | if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA) |
3505 | num_allocated += global_rsv->size; | ||
3508 | 3506 | ||
3509 | /* | 3507 | /* |
3510 | * in limited mode, we want to have some free space up to | 3508 | * in limited mode, we want to have some free space up to |
@@ -3518,15 +3516,8 @@ static int should_alloc_chunk(struct btrfs_root *root, | |||
3518 | if (num_bytes - num_allocated < thresh) | 3516 | if (num_bytes - num_allocated < thresh) |
3519 | return 1; | 3517 | return 1; |
3520 | } | 3518 | } |
3521 | thresh = btrfs_super_total_bytes(root->fs_info->super_copy); | ||
3522 | 3519 | ||
3523 | /* 256MB or 2% of the FS */ | 3520 | if (num_allocated + 2 * 1024 * 1024 < div_factor(num_bytes, 8)) |
3524 | thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 2)); | ||
3525 | /* system chunks need a much small threshold */ | ||
3526 | if (sinfo->flags & BTRFS_BLOCK_GROUP_SYSTEM) | ||
3527 | thresh = 32 * 1024 * 1024; | ||
3528 | |||
3529 | if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 8)) | ||
3530 | return 0; | 3521 | return 0; |
3531 | return 1; | 3522 | return 1; |
3532 | } | 3523 | } |
@@ -3576,8 +3567,7 @@ static void check_system_chunk(struct btrfs_trans_handle *trans, | |||
3576 | } | 3567 | } |
3577 | 3568 | ||
3578 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 3569 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
3579 | struct btrfs_root *extent_root, u64 alloc_bytes, | 3570 | struct btrfs_root *extent_root, u64 flags, int force) |
3580 | u64 flags, int force) | ||
3581 | { | 3571 | { |
3582 | struct btrfs_space_info *space_info; | 3572 | struct btrfs_space_info *space_info; |
3583 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | 3573 | struct btrfs_fs_info *fs_info = extent_root->fs_info; |
@@ -3601,7 +3591,7 @@ again: | |||
3601 | return 0; | 3591 | return 0; |
3602 | } | 3592 | } |
3603 | 3593 | ||
3604 | if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) { | 3594 | if (!should_alloc_chunk(extent_root, space_info, force)) { |
3605 | spin_unlock(&space_info->lock); | 3595 | spin_unlock(&space_info->lock); |
3606 | return 0; | 3596 | return 0; |
3607 | } else if (space_info->chunk_alloc) { | 3597 | } else if (space_info->chunk_alloc) { |
@@ -3669,6 +3659,46 @@ out: | |||
3669 | return ret; | 3659 | return ret; |
3670 | } | 3660 | } |
3671 | 3661 | ||
3662 | static int can_overcommit(struct btrfs_root *root, | ||
3663 | struct btrfs_space_info *space_info, u64 bytes, | ||
3664 | int flush) | ||
3665 | { | ||
3666 | u64 profile = btrfs_get_alloc_profile(root, 0); | ||
3667 | u64 avail; | ||
3668 | u64 used; | ||
3669 | |||
3670 | used = space_info->bytes_used + space_info->bytes_reserved + | ||
3671 | space_info->bytes_pinned + space_info->bytes_readonly + | ||
3672 | space_info->bytes_may_use; | ||
3673 | |||
3674 | spin_lock(&root->fs_info->free_chunk_lock); | ||
3675 | avail = root->fs_info->free_chunk_space; | ||
3676 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
3677 | |||
3678 | /* | ||
3679 | * If we have dup, raid1 or raid10 then only half of the free | ||
3680 | * space is actually useable. | ||
3681 | */ | ||
3682 | if (profile & (BTRFS_BLOCK_GROUP_DUP | | ||
3683 | BTRFS_BLOCK_GROUP_RAID1 | | ||
3684 | BTRFS_BLOCK_GROUP_RAID10)) | ||
3685 | avail >>= 1; | ||
3686 | |||
3687 | /* | ||
3688 | * If we aren't flushing don't let us overcommit too much, say | ||
3689 | * 1/8th of the space. If we can flush, let it overcommit up to | ||
3690 | * 1/2 of the space. | ||
3691 | */ | ||
3692 | if (flush) | ||
3693 | avail >>= 3; | ||
3694 | else | ||
3695 | avail >>= 1; | ||
3696 | |||
3697 | if (used + bytes < space_info->total_bytes + avail) | ||
3698 | return 1; | ||
3699 | return 0; | ||
3700 | } | ||
3701 | |||
3672 | /* | 3702 | /* |
3673 | * shrink metadata reservation for delalloc | 3703 | * shrink metadata reservation for delalloc |
3674 | */ | 3704 | */ |
@@ -3693,7 +3723,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
3693 | if (delalloc_bytes == 0) { | 3723 | if (delalloc_bytes == 0) { |
3694 | if (trans) | 3724 | if (trans) |
3695 | return; | 3725 | return; |
3696 | btrfs_wait_ordered_extents(root, 0, 0); | 3726 | btrfs_wait_ordered_extents(root, 0); |
3697 | return; | 3727 | return; |
3698 | } | 3728 | } |
3699 | 3729 | ||
@@ -3703,11 +3733,15 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
3703 | writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages, | 3733 | writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages, |
3704 | WB_REASON_FS_FREE_SPACE); | 3734 | WB_REASON_FS_FREE_SPACE); |
3705 | 3735 | ||
3736 | /* | ||
3737 | * We need to wait for the async pages to actually start before | ||
3738 | * we do anything. | ||
3739 | */ | ||
3740 | wait_event(root->fs_info->async_submit_wait, | ||
3741 | !atomic_read(&root->fs_info->async_delalloc_pages)); | ||
3742 | |||
3706 | spin_lock(&space_info->lock); | 3743 | spin_lock(&space_info->lock); |
3707 | if (space_info->bytes_used + space_info->bytes_reserved + | 3744 | if (can_overcommit(root, space_info, orig, !trans)) { |
3708 | space_info->bytes_pinned + space_info->bytes_readonly + | ||
3709 | space_info->bytes_may_use + orig <= | ||
3710 | space_info->total_bytes) { | ||
3711 | spin_unlock(&space_info->lock); | 3745 | spin_unlock(&space_info->lock); |
3712 | break; | 3746 | break; |
3713 | } | 3747 | } |
@@ -3715,7 +3749,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
3715 | 3749 | ||
3716 | loops++; | 3750 | loops++; |
3717 | if (wait_ordered && !trans) { | 3751 | if (wait_ordered && !trans) { |
3718 | btrfs_wait_ordered_extents(root, 0, 0); | 3752 | btrfs_wait_ordered_extents(root, 0); |
3719 | } else { | 3753 | } else { |
3720 | time_left = schedule_timeout_killable(1); | 3754 | time_left = schedule_timeout_killable(1); |
3721 | if (time_left) | 3755 | if (time_left) |
@@ -3784,11 +3818,12 @@ commit: | |||
3784 | } | 3818 | } |
3785 | 3819 | ||
3786 | enum flush_state { | 3820 | enum flush_state { |
3787 | FLUSH_DELALLOC = 1, | 3821 | FLUSH_DELAYED_ITEMS_NR = 1, |
3788 | FLUSH_DELALLOC_WAIT = 2, | 3822 | FLUSH_DELAYED_ITEMS = 2, |
3789 | FLUSH_DELAYED_ITEMS_NR = 3, | 3823 | FLUSH_DELALLOC = 3, |
3790 | FLUSH_DELAYED_ITEMS = 4, | 3824 | FLUSH_DELALLOC_WAIT = 4, |
3791 | COMMIT_TRANS = 5, | 3825 | ALLOC_CHUNK = 5, |
3826 | COMMIT_TRANS = 6, | ||
3792 | }; | 3827 | }; |
3793 | 3828 | ||
3794 | static int flush_space(struct btrfs_root *root, | 3829 | static int flush_space(struct btrfs_root *root, |
@@ -3800,11 +3835,6 @@ static int flush_space(struct btrfs_root *root, | |||
3800 | int ret = 0; | 3835 | int ret = 0; |
3801 | 3836 | ||
3802 | switch (state) { | 3837 | switch (state) { |
3803 | case FLUSH_DELALLOC: | ||
3804 | case FLUSH_DELALLOC_WAIT: | ||
3805 | shrink_delalloc(root, num_bytes, orig_bytes, | ||
3806 | state == FLUSH_DELALLOC_WAIT); | ||
3807 | break; | ||
3808 | case FLUSH_DELAYED_ITEMS_NR: | 3838 | case FLUSH_DELAYED_ITEMS_NR: |
3809 | case FLUSH_DELAYED_ITEMS: | 3839 | case FLUSH_DELAYED_ITEMS: |
3810 | if (state == FLUSH_DELAYED_ITEMS_NR) { | 3840 | if (state == FLUSH_DELAYED_ITEMS_NR) { |
@@ -3825,6 +3855,24 @@ static int flush_space(struct btrfs_root *root, | |||
3825 | ret = btrfs_run_delayed_items_nr(trans, root, nr); | 3855 | ret = btrfs_run_delayed_items_nr(trans, root, nr); |
3826 | btrfs_end_transaction(trans, root); | 3856 | btrfs_end_transaction(trans, root); |
3827 | break; | 3857 | break; |
3858 | case FLUSH_DELALLOC: | ||
3859 | case FLUSH_DELALLOC_WAIT: | ||
3860 | shrink_delalloc(root, num_bytes, orig_bytes, | ||
3861 | state == FLUSH_DELALLOC_WAIT); | ||
3862 | break; | ||
3863 | case ALLOC_CHUNK: | ||
3864 | trans = btrfs_join_transaction(root); | ||
3865 | if (IS_ERR(trans)) { | ||
3866 | ret = PTR_ERR(trans); | ||
3867 | break; | ||
3868 | } | ||
3869 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
3870 | btrfs_get_alloc_profile(root, 0), | ||
3871 | CHUNK_ALLOC_NO_FORCE); | ||
3872 | btrfs_end_transaction(trans, root); | ||
3873 | if (ret == -ENOSPC) | ||
3874 | ret = 0; | ||
3875 | break; | ||
3828 | case COMMIT_TRANS: | 3876 | case COMMIT_TRANS: |
3829 | ret = may_commit_transaction(root, space_info, orig_bytes, 0); | 3877 | ret = may_commit_transaction(root, space_info, orig_bytes, 0); |
3830 | break; | 3878 | break; |
@@ -3856,10 +3904,9 @@ static int reserve_metadata_bytes(struct btrfs_root *root, | |||
3856 | struct btrfs_space_info *space_info = block_rsv->space_info; | 3904 | struct btrfs_space_info *space_info = block_rsv->space_info; |
3857 | u64 used; | 3905 | u64 used; |
3858 | u64 num_bytes = orig_bytes; | 3906 | u64 num_bytes = orig_bytes; |
3859 | int flush_state = FLUSH_DELALLOC; | 3907 | int flush_state = FLUSH_DELAYED_ITEMS_NR; |
3860 | int ret = 0; | 3908 | int ret = 0; |
3861 | bool flushing = false; | 3909 | bool flushing = false; |
3862 | bool committed = false; | ||
3863 | 3910 | ||
3864 | again: | 3911 | again: |
3865 | ret = 0; | 3912 | ret = 0; |
@@ -3922,57 +3969,12 @@ again: | |||
3922 | (orig_bytes * 2); | 3969 | (orig_bytes * 2); |
3923 | } | 3970 | } |
3924 | 3971 | ||
3925 | if (ret) { | 3972 | if (ret && can_overcommit(root, space_info, orig_bytes, flush)) { |
3926 | u64 profile = btrfs_get_alloc_profile(root, 0); | 3973 | space_info->bytes_may_use += orig_bytes; |
3927 | u64 avail; | 3974 | trace_btrfs_space_reservation(root->fs_info, "space_info", |
3928 | 3975 | space_info->flags, orig_bytes, | |
3929 | /* | 3976 | 1); |
3930 | * If we have a lot of space that's pinned, don't bother doing | 3977 | ret = 0; |
3931 | * the overcommit dance yet and just commit the transaction. | ||
3932 | */ | ||
3933 | avail = (space_info->total_bytes - space_info->bytes_used) * 8; | ||
3934 | do_div(avail, 10); | ||
3935 | if (space_info->bytes_pinned >= avail && flush && !committed) { | ||
3936 | space_info->flush = 1; | ||
3937 | flushing = true; | ||
3938 | spin_unlock(&space_info->lock); | ||
3939 | ret = may_commit_transaction(root, space_info, | ||
3940 | orig_bytes, 1); | ||
3941 | if (ret) | ||
3942 | goto out; | ||
3943 | committed = true; | ||
3944 | goto again; | ||
3945 | } | ||
3946 | |||
3947 | spin_lock(&root->fs_info->free_chunk_lock); | ||
3948 | avail = root->fs_info->free_chunk_space; | ||
3949 | |||
3950 | /* | ||
3951 | * If we have dup, raid1 or raid10 then only half of the free | ||
3952 | * space is actually useable. | ||
3953 | */ | ||
3954 | if (profile & (BTRFS_BLOCK_GROUP_DUP | | ||
3955 | BTRFS_BLOCK_GROUP_RAID1 | | ||
3956 | BTRFS_BLOCK_GROUP_RAID10)) | ||
3957 | avail >>= 1; | ||
3958 | |||
3959 | /* | ||
3960 | * If we aren't flushing don't let us overcommit too much, say | ||
3961 | * 1/8th of the space. If we can flush, let it overcommit up to | ||
3962 | * 1/2 of the space. | ||
3963 | */ | ||
3964 | if (flush) | ||
3965 | avail >>= 3; | ||
3966 | else | ||
3967 | avail >>= 1; | ||
3968 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
3969 | |||
3970 | if (used + num_bytes < space_info->total_bytes + avail) { | ||
3971 | space_info->bytes_may_use += orig_bytes; | ||
3972 | trace_btrfs_space_reservation(root->fs_info, | ||
3973 | "space_info", space_info->flags, orig_bytes, 1); | ||
3974 | ret = 0; | ||
3975 | } | ||
3976 | } | 3978 | } |
3977 | 3979 | ||
3978 | /* | 3980 | /* |
@@ -4114,13 +4116,15 @@ static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src, | |||
4114 | return 0; | 4116 | return 0; |
4115 | } | 4117 | } |
4116 | 4118 | ||
4117 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv) | 4119 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type) |
4118 | { | 4120 | { |
4119 | memset(rsv, 0, sizeof(*rsv)); | 4121 | memset(rsv, 0, sizeof(*rsv)); |
4120 | spin_lock_init(&rsv->lock); | 4122 | spin_lock_init(&rsv->lock); |
4123 | rsv->type = type; | ||
4121 | } | 4124 | } |
4122 | 4125 | ||
4123 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | 4126 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root, |
4127 | unsigned short type) | ||
4124 | { | 4128 | { |
4125 | struct btrfs_block_rsv *block_rsv; | 4129 | struct btrfs_block_rsv *block_rsv; |
4126 | struct btrfs_fs_info *fs_info = root->fs_info; | 4130 | struct btrfs_fs_info *fs_info = root->fs_info; |
@@ -4129,7 +4133,7 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | |||
4129 | if (!block_rsv) | 4133 | if (!block_rsv) |
4130 | return NULL; | 4134 | return NULL; |
4131 | 4135 | ||
4132 | btrfs_init_block_rsv(block_rsv); | 4136 | btrfs_init_block_rsv(block_rsv, type); |
4133 | block_rsv->space_info = __find_space_info(fs_info, | 4137 | block_rsv->space_info = __find_space_info(fs_info, |
4134 | BTRFS_BLOCK_GROUP_METADATA); | 4138 | BTRFS_BLOCK_GROUP_METADATA); |
4135 | return block_rsv; | 4139 | return block_rsv; |
@@ -4138,6 +4142,8 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | |||
4138 | void btrfs_free_block_rsv(struct btrfs_root *root, | 4142 | void btrfs_free_block_rsv(struct btrfs_root *root, |
4139 | struct btrfs_block_rsv *rsv) | 4143 | struct btrfs_block_rsv *rsv) |
4140 | { | 4144 | { |
4145 | if (!rsv) | ||
4146 | return; | ||
4141 | btrfs_block_rsv_release(root, rsv, (u64)-1); | 4147 | btrfs_block_rsv_release(root, rsv, (u64)-1); |
4142 | kfree(rsv); | 4148 | kfree(rsv); |
4143 | } | 4149 | } |
@@ -4416,10 +4422,10 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | |||
4416 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); | 4422 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); |
4417 | struct btrfs_block_rsv *dst_rsv = &pending->block_rsv; | 4423 | struct btrfs_block_rsv *dst_rsv = &pending->block_rsv; |
4418 | /* | 4424 | /* |
4419 | * two for root back/forward refs, two for directory entries | 4425 | * two for root back/forward refs, two for directory entries, |
4420 | * and one for root of the snapshot. | 4426 | * one for root of the snapshot and one for parent inode. |
4421 | */ | 4427 | */ |
4422 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5); | 4428 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 6); |
4423 | dst_rsv->space_info = src_rsv->space_info; | 4429 | dst_rsv->space_info = src_rsv->space_info; |
4424 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 4430 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); |
4425 | } | 4431 | } |
@@ -5018,7 +5024,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
5018 | 5024 | ||
5019 | while (1) { | 5025 | while (1) { |
5020 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 5026 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
5021 | EXTENT_DIRTY); | 5027 | EXTENT_DIRTY, NULL); |
5022 | if (ret) | 5028 | if (ret) |
5023 | break; | 5029 | break; |
5024 | 5030 | ||
@@ -5096,8 +5102,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5096 | ret = remove_extent_backref(trans, extent_root, path, | 5102 | ret = remove_extent_backref(trans, extent_root, path, |
5097 | NULL, refs_to_drop, | 5103 | NULL, refs_to_drop, |
5098 | is_data); | 5104 | is_data); |
5099 | if (ret) | 5105 | if (ret) { |
5100 | goto abort; | 5106 | btrfs_abort_transaction(trans, extent_root, ret); |
5107 | goto out; | ||
5108 | } | ||
5101 | btrfs_release_path(path); | 5109 | btrfs_release_path(path); |
5102 | path->leave_spinning = 1; | 5110 | path->leave_spinning = 1; |
5103 | 5111 | ||
@@ -5115,8 +5123,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5115 | btrfs_print_leaf(extent_root, | 5123 | btrfs_print_leaf(extent_root, |
5116 | path->nodes[0]); | 5124 | path->nodes[0]); |
5117 | } | 5125 | } |
5118 | if (ret < 0) | 5126 | if (ret < 0) { |
5119 | goto abort; | 5127 | btrfs_abort_transaction(trans, extent_root, ret); |
5128 | goto out; | ||
5129 | } | ||
5120 | extent_slot = path->slots[0]; | 5130 | extent_slot = path->slots[0]; |
5121 | } | 5131 | } |
5122 | } else if (ret == -ENOENT) { | 5132 | } else if (ret == -ENOENT) { |
@@ -5130,7 +5140,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5130 | (unsigned long long)owner_objectid, | 5140 | (unsigned long long)owner_objectid, |
5131 | (unsigned long long)owner_offset); | 5141 | (unsigned long long)owner_offset); |
5132 | } else { | 5142 | } else { |
5133 | goto abort; | 5143 | btrfs_abort_transaction(trans, extent_root, ret); |
5144 | goto out; | ||
5134 | } | 5145 | } |
5135 | 5146 | ||
5136 | leaf = path->nodes[0]; | 5147 | leaf = path->nodes[0]; |
@@ -5140,8 +5151,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5140 | BUG_ON(found_extent || extent_slot != path->slots[0]); | 5151 | BUG_ON(found_extent || extent_slot != path->slots[0]); |
5141 | ret = convert_extent_item_v0(trans, extent_root, path, | 5152 | ret = convert_extent_item_v0(trans, extent_root, path, |
5142 | owner_objectid, 0); | 5153 | owner_objectid, 0); |
5143 | if (ret < 0) | 5154 | if (ret < 0) { |
5144 | goto abort; | 5155 | btrfs_abort_transaction(trans, extent_root, ret); |
5156 | goto out; | ||
5157 | } | ||
5145 | 5158 | ||
5146 | btrfs_release_path(path); | 5159 | btrfs_release_path(path); |
5147 | path->leave_spinning = 1; | 5160 | path->leave_spinning = 1; |
@@ -5158,8 +5171,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5158 | (unsigned long long)bytenr); | 5171 | (unsigned long long)bytenr); |
5159 | btrfs_print_leaf(extent_root, path->nodes[0]); | 5172 | btrfs_print_leaf(extent_root, path->nodes[0]); |
5160 | } | 5173 | } |
5161 | if (ret < 0) | 5174 | if (ret < 0) { |
5162 | goto abort; | 5175 | btrfs_abort_transaction(trans, extent_root, ret); |
5176 | goto out; | ||
5177 | } | ||
5178 | |||
5163 | extent_slot = path->slots[0]; | 5179 | extent_slot = path->slots[0]; |
5164 | leaf = path->nodes[0]; | 5180 | leaf = path->nodes[0]; |
5165 | item_size = btrfs_item_size_nr(leaf, extent_slot); | 5181 | item_size = btrfs_item_size_nr(leaf, extent_slot); |
@@ -5196,8 +5212,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5196 | ret = remove_extent_backref(trans, extent_root, path, | 5212 | ret = remove_extent_backref(trans, extent_root, path, |
5197 | iref, refs_to_drop, | 5213 | iref, refs_to_drop, |
5198 | is_data); | 5214 | is_data); |
5199 | if (ret) | 5215 | if (ret) { |
5200 | goto abort; | 5216 | btrfs_abort_transaction(trans, extent_root, ret); |
5217 | goto out; | ||
5218 | } | ||
5201 | } | 5219 | } |
5202 | } else { | 5220 | } else { |
5203 | if (found_extent) { | 5221 | if (found_extent) { |
@@ -5214,27 +5232,29 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5214 | 5232 | ||
5215 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], | 5233 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], |
5216 | num_to_del); | 5234 | num_to_del); |
5217 | if (ret) | 5235 | if (ret) { |
5218 | goto abort; | 5236 | btrfs_abort_transaction(trans, extent_root, ret); |
5237 | goto out; | ||
5238 | } | ||
5219 | btrfs_release_path(path); | 5239 | btrfs_release_path(path); |
5220 | 5240 | ||
5221 | if (is_data) { | 5241 | if (is_data) { |
5222 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); | 5242 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); |
5223 | if (ret) | 5243 | if (ret) { |
5224 | goto abort; | 5244 | btrfs_abort_transaction(trans, extent_root, ret); |
5245 | goto out; | ||
5246 | } | ||
5225 | } | 5247 | } |
5226 | 5248 | ||
5227 | ret = update_block_group(trans, root, bytenr, num_bytes, 0); | 5249 | ret = update_block_group(trans, root, bytenr, num_bytes, 0); |
5228 | if (ret) | 5250 | if (ret) { |
5229 | goto abort; | 5251 | btrfs_abort_transaction(trans, extent_root, ret); |
5252 | goto out; | ||
5253 | } | ||
5230 | } | 5254 | } |
5231 | out: | 5255 | out: |
5232 | btrfs_free_path(path); | 5256 | btrfs_free_path(path); |
5233 | return ret; | 5257 | return ret; |
5234 | |||
5235 | abort: | ||
5236 | btrfs_abort_transaction(trans, extent_root, ret); | ||
5237 | goto out; | ||
5238 | } | 5258 | } |
5239 | 5259 | ||
5240 | /* | 5260 | /* |
@@ -5497,8 +5517,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
5497 | struct btrfs_block_group_cache *used_block_group; | 5517 | struct btrfs_block_group_cache *used_block_group; |
5498 | u64 search_start = 0; | 5518 | u64 search_start = 0; |
5499 | int empty_cluster = 2 * 1024 * 1024; | 5519 | int empty_cluster = 2 * 1024 * 1024; |
5500 | int allowed_chunk_alloc = 0; | ||
5501 | int done_chunk_alloc = 0; | ||
5502 | struct btrfs_space_info *space_info; | 5520 | struct btrfs_space_info *space_info; |
5503 | int loop = 0; | 5521 | int loop = 0; |
5504 | int index = 0; | 5522 | int index = 0; |
@@ -5530,9 +5548,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
5530 | if (btrfs_mixed_space_info(space_info)) | 5548 | if (btrfs_mixed_space_info(space_info)) |
5531 | use_cluster = false; | 5549 | use_cluster = false; |
5532 | 5550 | ||
5533 | if (orig_root->ref_cows || empty_size) | ||
5534 | allowed_chunk_alloc = 1; | ||
5535 | |||
5536 | if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { | 5551 | if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { |
5537 | last_ptr = &root->fs_info->meta_alloc_cluster; | 5552 | last_ptr = &root->fs_info->meta_alloc_cluster; |
5538 | if (!btrfs_test_opt(root, SSD)) | 5553 | if (!btrfs_test_opt(root, SSD)) |
@@ -5806,10 +5821,6 @@ checks: | |||
5806 | 5821 | ||
5807 | trace_btrfs_reserve_extent(orig_root, block_group, | 5822 | trace_btrfs_reserve_extent(orig_root, block_group, |
5808 | search_start, num_bytes); | 5823 | search_start, num_bytes); |
5809 | if (offset < search_start) | ||
5810 | btrfs_add_free_space(used_block_group, offset, | ||
5811 | search_start - offset); | ||
5812 | BUG_ON(offset > search_start); | ||
5813 | if (used_block_group != block_group) | 5824 | if (used_block_group != block_group) |
5814 | btrfs_put_block_group(used_block_group); | 5825 | btrfs_put_block_group(used_block_group); |
5815 | btrfs_put_block_group(block_group); | 5826 | btrfs_put_block_group(block_group); |
@@ -5842,34 +5853,17 @@ loop: | |||
5842 | index = 0; | 5853 | index = 0; |
5843 | loop++; | 5854 | loop++; |
5844 | if (loop == LOOP_ALLOC_CHUNK) { | 5855 | if (loop == LOOP_ALLOC_CHUNK) { |
5845 | if (allowed_chunk_alloc) { | 5856 | ret = do_chunk_alloc(trans, root, data, |
5846 | ret = do_chunk_alloc(trans, root, num_bytes + | 5857 | CHUNK_ALLOC_FORCE); |
5847 | 2 * 1024 * 1024, data, | 5858 | /* |
5848 | CHUNK_ALLOC_LIMITED); | 5859 | * Do not bail out on ENOSPC since we |
5849 | /* | 5860 | * can do more things. |
5850 | * Do not bail out on ENOSPC since we | 5861 | */ |
5851 | * can do more things. | 5862 | if (ret < 0 && ret != -ENOSPC) { |
5852 | */ | 5863 | btrfs_abort_transaction(trans, |
5853 | if (ret < 0 && ret != -ENOSPC) { | 5864 | root, ret); |
5854 | btrfs_abort_transaction(trans, | 5865 | goto out; |
5855 | root, ret); | ||
5856 | goto out; | ||
5857 | } | ||
5858 | allowed_chunk_alloc = 0; | ||
5859 | if (ret == 1) | ||
5860 | done_chunk_alloc = 1; | ||
5861 | } else if (!done_chunk_alloc && | ||
5862 | space_info->force_alloc == | ||
5863 | CHUNK_ALLOC_NO_FORCE) { | ||
5864 | space_info->force_alloc = CHUNK_ALLOC_LIMITED; | ||
5865 | } | 5866 | } |
5866 | |||
5867 | /* | ||
5868 | * We didn't allocate a chunk, go ahead and drop the | ||
5869 | * empty size and loop again. | ||
5870 | */ | ||
5871 | if (!done_chunk_alloc) | ||
5872 | loop = LOOP_NO_EMPTY_SIZE; | ||
5873 | } | 5867 | } |
5874 | 5868 | ||
5875 | if (loop == LOOP_NO_EMPTY_SIZE) { | 5869 | if (loop == LOOP_NO_EMPTY_SIZE) { |
@@ -5944,20 +5938,6 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | |||
5944 | 5938 | ||
5945 | data = btrfs_get_alloc_profile(root, data); | 5939 | data = btrfs_get_alloc_profile(root, data); |
5946 | again: | 5940 | again: |
5947 | /* | ||
5948 | * the only place that sets empty_size is btrfs_realloc_node, which | ||
5949 | * is not called recursively on allocations | ||
5950 | */ | ||
5951 | if (empty_size || root->ref_cows) { | ||
5952 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
5953 | num_bytes + 2 * 1024 * 1024, data, | ||
5954 | CHUNK_ALLOC_NO_FORCE); | ||
5955 | if (ret < 0 && ret != -ENOSPC) { | ||
5956 | btrfs_abort_transaction(trans, root, ret); | ||
5957 | return ret; | ||
5958 | } | ||
5959 | } | ||
5960 | |||
5961 | WARN_ON(num_bytes < root->sectorsize); | 5941 | WARN_ON(num_bytes < root->sectorsize); |
5962 | ret = find_free_extent(trans, root, num_bytes, empty_size, | 5942 | ret = find_free_extent(trans, root, num_bytes, empty_size, |
5963 | hint_byte, ins, data); | 5943 | hint_byte, ins, data); |
@@ -5967,12 +5947,6 @@ again: | |||
5967 | num_bytes = num_bytes >> 1; | 5947 | num_bytes = num_bytes >> 1; |
5968 | num_bytes = num_bytes & ~(root->sectorsize - 1); | 5948 | num_bytes = num_bytes & ~(root->sectorsize - 1); |
5969 | num_bytes = max(num_bytes, min_alloc_size); | 5949 | num_bytes = max(num_bytes, min_alloc_size); |
5970 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
5971 | num_bytes, data, CHUNK_ALLOC_FORCE); | ||
5972 | if (ret < 0 && ret != -ENOSPC) { | ||
5973 | btrfs_abort_transaction(trans, root, ret); | ||
5974 | return ret; | ||
5975 | } | ||
5976 | if (num_bytes == min_alloc_size) | 5950 | if (num_bytes == min_alloc_size) |
5977 | final_tried = true; | 5951 | final_tried = true; |
5978 | goto again; | 5952 | goto again; |
@@ -6314,7 +6288,7 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
6314 | ret = block_rsv_use_bytes(block_rsv, blocksize); | 6288 | ret = block_rsv_use_bytes(block_rsv, blocksize); |
6315 | if (!ret) | 6289 | if (!ret) |
6316 | return block_rsv; | 6290 | return block_rsv; |
6317 | if (ret) { | 6291 | if (ret && !block_rsv->failfast) { |
6318 | static DEFINE_RATELIMIT_STATE(_rs, | 6292 | static DEFINE_RATELIMIT_STATE(_rs, |
6319 | DEFAULT_RATELIMIT_INTERVAL, | 6293 | DEFAULT_RATELIMIT_INTERVAL, |
6320 | /*DEFAULT_RATELIMIT_BURST*/ 2); | 6294 | /*DEFAULT_RATELIMIT_BURST*/ 2); |
@@ -7279,7 +7253,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
7279 | 7253 | ||
7280 | alloc_flags = update_block_group_flags(root, cache->flags); | 7254 | alloc_flags = update_block_group_flags(root, cache->flags); |
7281 | if (alloc_flags != cache->flags) { | 7255 | if (alloc_flags != cache->flags) { |
7282 | ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, | 7256 | ret = do_chunk_alloc(trans, root, alloc_flags, |
7283 | CHUNK_ALLOC_FORCE); | 7257 | CHUNK_ALLOC_FORCE); |
7284 | if (ret < 0) | 7258 | if (ret < 0) |
7285 | goto out; | 7259 | goto out; |
@@ -7289,7 +7263,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
7289 | if (!ret) | 7263 | if (!ret) |
7290 | goto out; | 7264 | goto out; |
7291 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); | 7265 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); |
7292 | ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, | 7266 | ret = do_chunk_alloc(trans, root, alloc_flags, |
7293 | CHUNK_ALLOC_FORCE); | 7267 | CHUNK_ALLOC_FORCE); |
7294 | if (ret < 0) | 7268 | if (ret < 0) |
7295 | goto out; | 7269 | goto out; |
@@ -7303,7 +7277,7 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, | |||
7303 | struct btrfs_root *root, u64 type) | 7277 | struct btrfs_root *root, u64 type) |
7304 | { | 7278 | { |
7305 | u64 alloc_flags = get_alloc_profile(root, type); | 7279 | u64 alloc_flags = get_alloc_profile(root, type); |
7306 | return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, | 7280 | return do_chunk_alloc(trans, root, alloc_flags, |
7307 | CHUNK_ALLOC_FORCE); | 7281 | CHUNK_ALLOC_FORCE); |
7308 | } | 7282 | } |
7309 | 7283 | ||
@@ -7810,6 +7784,34 @@ error: | |||
7810 | return ret; | 7784 | return ret; |
7811 | } | 7785 | } |
7812 | 7786 | ||
7787 | void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, | ||
7788 | struct btrfs_root *root) | ||
7789 | { | ||
7790 | struct btrfs_block_group_cache *block_group, *tmp; | ||
7791 | struct btrfs_root *extent_root = root->fs_info->extent_root; | ||
7792 | struct btrfs_block_group_item item; | ||
7793 | struct btrfs_key key; | ||
7794 | int ret = 0; | ||
7795 | |||
7796 | list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, | ||
7797 | new_bg_list) { | ||
7798 | list_del_init(&block_group->new_bg_list); | ||
7799 | |||
7800 | if (ret) | ||
7801 | continue; | ||
7802 | |||
7803 | spin_lock(&block_group->lock); | ||
7804 | memcpy(&item, &block_group->item, sizeof(item)); | ||
7805 | memcpy(&key, &block_group->key, sizeof(key)); | ||
7806 | spin_unlock(&block_group->lock); | ||
7807 | |||
7808 | ret = btrfs_insert_item(trans, extent_root, &key, &item, | ||
7809 | sizeof(item)); | ||
7810 | if (ret) | ||
7811 | btrfs_abort_transaction(trans, extent_root, ret); | ||
7812 | } | ||
7813 | } | ||
7814 | |||
7813 | int btrfs_make_block_group(struct btrfs_trans_handle *trans, | 7815 | int btrfs_make_block_group(struct btrfs_trans_handle *trans, |
7814 | struct btrfs_root *root, u64 bytes_used, | 7816 | struct btrfs_root *root, u64 bytes_used, |
7815 | u64 type, u64 chunk_objectid, u64 chunk_offset, | 7817 | u64 type, u64 chunk_objectid, u64 chunk_offset, |
@@ -7843,6 +7845,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7843 | spin_lock_init(&cache->lock); | 7845 | spin_lock_init(&cache->lock); |
7844 | INIT_LIST_HEAD(&cache->list); | 7846 | INIT_LIST_HEAD(&cache->list); |
7845 | INIT_LIST_HEAD(&cache->cluster_list); | 7847 | INIT_LIST_HEAD(&cache->cluster_list); |
7848 | INIT_LIST_HEAD(&cache->new_bg_list); | ||
7846 | 7849 | ||
7847 | btrfs_init_free_space_ctl(cache); | 7850 | btrfs_init_free_space_ctl(cache); |
7848 | 7851 | ||
@@ -7874,12 +7877,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7874 | ret = btrfs_add_block_group_cache(root->fs_info, cache); | 7877 | ret = btrfs_add_block_group_cache(root->fs_info, cache); |
7875 | BUG_ON(ret); /* Logic error */ | 7878 | BUG_ON(ret); /* Logic error */ |
7876 | 7879 | ||
7877 | ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, | 7880 | list_add_tail(&cache->new_bg_list, &trans->new_bgs); |
7878 | sizeof(cache->item)); | ||
7879 | if (ret) { | ||
7880 | btrfs_abort_transaction(trans, extent_root, ret); | ||
7881 | return ret; | ||
7882 | } | ||
7883 | 7881 | ||
7884 | set_avail_alloc_bits(extent_root->fs_info, type); | 7882 | set_avail_alloc_bits(extent_root->fs_info, type); |
7885 | 7883 | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index b08ea4717e9d..8036d3a84853 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -45,6 +45,7 @@ struct extent_page_data { | |||
45 | struct bio *bio; | 45 | struct bio *bio; |
46 | struct extent_io_tree *tree; | 46 | struct extent_io_tree *tree; |
47 | get_extent_t *get_extent; | 47 | get_extent_t *get_extent; |
48 | unsigned long bio_flags; | ||
48 | 49 | ||
49 | /* tells writepage not to lock the state bits for this range | 50 | /* tells writepage not to lock the state bits for this range |
50 | * it still does the unlocking | 51 | * it still does the unlocking |
@@ -64,13 +65,13 @@ tree_fs_info(struct extent_io_tree *tree) | |||
64 | 65 | ||
65 | int __init extent_io_init(void) | 66 | int __init extent_io_init(void) |
66 | { | 67 | { |
67 | extent_state_cache = kmem_cache_create("extent_state", | 68 | extent_state_cache = kmem_cache_create("btrfs_extent_state", |
68 | sizeof(struct extent_state), 0, | 69 | sizeof(struct extent_state), 0, |
69 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 70 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
70 | if (!extent_state_cache) | 71 | if (!extent_state_cache) |
71 | return -ENOMEM; | 72 | return -ENOMEM; |
72 | 73 | ||
73 | extent_buffer_cache = kmem_cache_create("extent_buffers", | 74 | extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer", |
74 | sizeof(struct extent_buffer), 0, | 75 | sizeof(struct extent_buffer), 0, |
75 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 76 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
76 | if (!extent_buffer_cache) | 77 | if (!extent_buffer_cache) |
@@ -942,6 +943,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, | |||
942 | * @end: the end offset in bytes (inclusive) | 943 | * @end: the end offset in bytes (inclusive) |
943 | * @bits: the bits to set in this range | 944 | * @bits: the bits to set in this range |
944 | * @clear_bits: the bits to clear in this range | 945 | * @clear_bits: the bits to clear in this range |
946 | * @cached_state: state that we're going to cache | ||
945 | * @mask: the allocation mask | 947 | * @mask: the allocation mask |
946 | * | 948 | * |
947 | * This will go through and set bits for the given range. If any states exist | 949 | * This will go through and set bits for the given range. If any states exist |
@@ -951,7 +953,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, | |||
951 | * boundary bits like LOCK. | 953 | * boundary bits like LOCK. |
952 | */ | 954 | */ |
953 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 955 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
954 | int bits, int clear_bits, gfp_t mask) | 956 | int bits, int clear_bits, |
957 | struct extent_state **cached_state, gfp_t mask) | ||
955 | { | 958 | { |
956 | struct extent_state *state; | 959 | struct extent_state *state; |
957 | struct extent_state *prealloc = NULL; | 960 | struct extent_state *prealloc = NULL; |
@@ -968,6 +971,15 @@ again: | |||
968 | } | 971 | } |
969 | 972 | ||
970 | spin_lock(&tree->lock); | 973 | spin_lock(&tree->lock); |
974 | if (cached_state && *cached_state) { | ||
975 | state = *cached_state; | ||
976 | if (state->start <= start && state->end > start && | ||
977 | state->tree) { | ||
978 | node = &state->rb_node; | ||
979 | goto hit_next; | ||
980 | } | ||
981 | } | ||
982 | |||
971 | /* | 983 | /* |
972 | * this search will find all the extents that end after | 984 | * this search will find all the extents that end after |
973 | * our range starts. | 985 | * our range starts. |
@@ -998,6 +1010,7 @@ hit_next: | |||
998 | */ | 1010 | */ |
999 | if (state->start == start && state->end <= end) { | 1011 | if (state->start == start && state->end <= end) { |
1000 | set_state_bits(tree, state, &bits); | 1012 | set_state_bits(tree, state, &bits); |
1013 | cache_state(state, cached_state); | ||
1001 | state = clear_state_bit(tree, state, &clear_bits, 0); | 1014 | state = clear_state_bit(tree, state, &clear_bits, 0); |
1002 | if (last_end == (u64)-1) | 1015 | if (last_end == (u64)-1) |
1003 | goto out; | 1016 | goto out; |
@@ -1038,6 +1051,7 @@ hit_next: | |||
1038 | goto out; | 1051 | goto out; |
1039 | if (state->end <= end) { | 1052 | if (state->end <= end) { |
1040 | set_state_bits(tree, state, &bits); | 1053 | set_state_bits(tree, state, &bits); |
1054 | cache_state(state, cached_state); | ||
1041 | state = clear_state_bit(tree, state, &clear_bits, 0); | 1055 | state = clear_state_bit(tree, state, &clear_bits, 0); |
1042 | if (last_end == (u64)-1) | 1056 | if (last_end == (u64)-1) |
1043 | goto out; | 1057 | goto out; |
@@ -1076,6 +1090,7 @@ hit_next: | |||
1076 | &bits); | 1090 | &bits); |
1077 | if (err) | 1091 | if (err) |
1078 | extent_io_tree_panic(tree, err); | 1092 | extent_io_tree_panic(tree, err); |
1093 | cache_state(prealloc, cached_state); | ||
1079 | prealloc = NULL; | 1094 | prealloc = NULL; |
1080 | start = this_end + 1; | 1095 | start = this_end + 1; |
1081 | goto search_again; | 1096 | goto search_again; |
@@ -1098,6 +1113,7 @@ hit_next: | |||
1098 | extent_io_tree_panic(tree, err); | 1113 | extent_io_tree_panic(tree, err); |
1099 | 1114 | ||
1100 | set_state_bits(tree, prealloc, &bits); | 1115 | set_state_bits(tree, prealloc, &bits); |
1116 | cache_state(prealloc, cached_state); | ||
1101 | clear_state_bit(tree, prealloc, &clear_bits, 0); | 1117 | clear_state_bit(tree, prealloc, &clear_bits, 0); |
1102 | prealloc = NULL; | 1118 | prealloc = NULL; |
1103 | goto out; | 1119 | goto out; |
@@ -1150,6 +1166,14 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | |||
1150 | NULL, cached_state, mask); | 1166 | NULL, cached_state, mask); |
1151 | } | 1167 | } |
1152 | 1168 | ||
1169 | int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end, | ||
1170 | struct extent_state **cached_state, gfp_t mask) | ||
1171 | { | ||
1172 | return set_extent_bit(tree, start, end, | ||
1173 | EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG, | ||
1174 | NULL, cached_state, mask); | ||
1175 | } | ||
1176 | |||
1153 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 1177 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
1154 | gfp_t mask) | 1178 | gfp_t mask) |
1155 | { | 1179 | { |
@@ -1294,18 +1318,42 @@ out: | |||
1294 | * If nothing was found, 1 is returned. If found something, return 0. | 1318 | * If nothing was found, 1 is returned. If found something, return 0. |
1295 | */ | 1319 | */ |
1296 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | 1320 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, |
1297 | u64 *start_ret, u64 *end_ret, int bits) | 1321 | u64 *start_ret, u64 *end_ret, int bits, |
1322 | struct extent_state **cached_state) | ||
1298 | { | 1323 | { |
1299 | struct extent_state *state; | 1324 | struct extent_state *state; |
1325 | struct rb_node *n; | ||
1300 | int ret = 1; | 1326 | int ret = 1; |
1301 | 1327 | ||
1302 | spin_lock(&tree->lock); | 1328 | spin_lock(&tree->lock); |
1329 | if (cached_state && *cached_state) { | ||
1330 | state = *cached_state; | ||
1331 | if (state->end == start - 1 && state->tree) { | ||
1332 | n = rb_next(&state->rb_node); | ||
1333 | while (n) { | ||
1334 | state = rb_entry(n, struct extent_state, | ||
1335 | rb_node); | ||
1336 | if (state->state & bits) | ||
1337 | goto got_it; | ||
1338 | n = rb_next(n); | ||
1339 | } | ||
1340 | free_extent_state(*cached_state); | ||
1341 | *cached_state = NULL; | ||
1342 | goto out; | ||
1343 | } | ||
1344 | free_extent_state(*cached_state); | ||
1345 | *cached_state = NULL; | ||
1346 | } | ||
1347 | |||
1303 | state = find_first_extent_bit_state(tree, start, bits); | 1348 | state = find_first_extent_bit_state(tree, start, bits); |
1349 | got_it: | ||
1304 | if (state) { | 1350 | if (state) { |
1351 | cache_state(state, cached_state); | ||
1305 | *start_ret = state->start; | 1352 | *start_ret = state->start; |
1306 | *end_ret = state->end; | 1353 | *end_ret = state->end; |
1307 | ret = 0; | 1354 | ret = 0; |
1308 | } | 1355 | } |
1356 | out: | ||
1309 | spin_unlock(&tree->lock); | 1357 | spin_unlock(&tree->lock); |
1310 | return ret; | 1358 | return ret; |
1311 | } | 1359 | } |
@@ -2068,7 +2116,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, | |||
2068 | } | 2116 | } |
2069 | read_unlock(&em_tree->lock); | 2117 | read_unlock(&em_tree->lock); |
2070 | 2118 | ||
2071 | if (!em || IS_ERR(em)) { | 2119 | if (!em) { |
2072 | kfree(failrec); | 2120 | kfree(failrec); |
2073 | return -EIO; | 2121 | return -EIO; |
2074 | } | 2122 | } |
@@ -2304,8 +2352,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2304 | struct extent_state *cached = NULL; | 2352 | struct extent_state *cached = NULL; |
2305 | struct extent_state *state; | 2353 | struct extent_state *state; |
2306 | 2354 | ||
2307 | pr_debug("end_bio_extent_readpage: bi_vcnt=%d, idx=%d, err=%d, " | 2355 | pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " |
2308 | "mirror=%ld\n", bio->bi_vcnt, bio->bi_idx, err, | 2356 | "mirror=%ld\n", (u64)bio->bi_sector, err, |
2309 | (long int)bio->bi_bdev); | 2357 | (long int)bio->bi_bdev); |
2310 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 2358 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
2311 | 2359 | ||
@@ -2709,12 +2757,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2709 | end_bio_extent_readpage, mirror_num, | 2757 | end_bio_extent_readpage, mirror_num, |
2710 | *bio_flags, | 2758 | *bio_flags, |
2711 | this_bio_flag); | 2759 | this_bio_flag); |
2712 | BUG_ON(ret == -ENOMEM); | 2760 | if (!ret) { |
2713 | nr++; | 2761 | nr++; |
2714 | *bio_flags = this_bio_flag; | 2762 | *bio_flags = this_bio_flag; |
2763 | } | ||
2715 | } | 2764 | } |
2716 | if (ret) | 2765 | if (ret) { |
2717 | SetPageError(page); | 2766 | SetPageError(page); |
2767 | unlock_extent(tree, cur, cur + iosize - 1); | ||
2768 | } | ||
2718 | cur = cur + iosize; | 2769 | cur = cur + iosize; |
2719 | pg_offset += iosize; | 2770 | pg_offset += iosize; |
2720 | } | 2771 | } |
@@ -3161,12 +3212,16 @@ static int write_one_eb(struct extent_buffer *eb, | |||
3161 | struct block_device *bdev = fs_info->fs_devices->latest_bdev; | 3212 | struct block_device *bdev = fs_info->fs_devices->latest_bdev; |
3162 | u64 offset = eb->start; | 3213 | u64 offset = eb->start; |
3163 | unsigned long i, num_pages; | 3214 | unsigned long i, num_pages; |
3215 | unsigned long bio_flags = 0; | ||
3164 | int rw = (epd->sync_io ? WRITE_SYNC : WRITE); | 3216 | int rw = (epd->sync_io ? WRITE_SYNC : WRITE); |
3165 | int ret = 0; | 3217 | int ret = 0; |
3166 | 3218 | ||
3167 | clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | 3219 | clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); |
3168 | num_pages = num_extent_pages(eb->start, eb->len); | 3220 | num_pages = num_extent_pages(eb->start, eb->len); |
3169 | atomic_set(&eb->io_pages, num_pages); | 3221 | atomic_set(&eb->io_pages, num_pages); |
3222 | if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID) | ||
3223 | bio_flags = EXTENT_BIO_TREE_LOG; | ||
3224 | |||
3170 | for (i = 0; i < num_pages; i++) { | 3225 | for (i = 0; i < num_pages; i++) { |
3171 | struct page *p = extent_buffer_page(eb, i); | 3226 | struct page *p = extent_buffer_page(eb, i); |
3172 | 3227 | ||
@@ -3175,7 +3230,8 @@ static int write_one_eb(struct extent_buffer *eb, | |||
3175 | ret = submit_extent_page(rw, eb->tree, p, offset >> 9, | 3230 | ret = submit_extent_page(rw, eb->tree, p, offset >> 9, |
3176 | PAGE_CACHE_SIZE, 0, bdev, &epd->bio, | 3231 | PAGE_CACHE_SIZE, 0, bdev, &epd->bio, |
3177 | -1, end_bio_extent_buffer_writepage, | 3232 | -1, end_bio_extent_buffer_writepage, |
3178 | 0, 0, 0); | 3233 | 0, epd->bio_flags, bio_flags); |
3234 | epd->bio_flags = bio_flags; | ||
3179 | if (ret) { | 3235 | if (ret) { |
3180 | set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | 3236 | set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); |
3181 | SetPageError(p); | 3237 | SetPageError(p); |
@@ -3210,6 +3266,7 @@ int btree_write_cache_pages(struct address_space *mapping, | |||
3210 | .tree = tree, | 3266 | .tree = tree, |
3211 | .extent_locked = 0, | 3267 | .extent_locked = 0, |
3212 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 3268 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
3269 | .bio_flags = 0, | ||
3213 | }; | 3270 | }; |
3214 | int ret = 0; | 3271 | int ret = 0; |
3215 | int done = 0; | 3272 | int done = 0; |
@@ -3254,19 +3311,34 @@ retry: | |||
3254 | break; | 3311 | break; |
3255 | } | 3312 | } |
3256 | 3313 | ||
3314 | spin_lock(&mapping->private_lock); | ||
3315 | if (!PagePrivate(page)) { | ||
3316 | spin_unlock(&mapping->private_lock); | ||
3317 | continue; | ||
3318 | } | ||
3319 | |||
3257 | eb = (struct extent_buffer *)page->private; | 3320 | eb = (struct extent_buffer *)page->private; |
3321 | |||
3322 | /* | ||
3323 | * Shouldn't happen and normally this would be a BUG_ON | ||
3324 | * but no sense in crashing the users box for something | ||
3325 | * we can survive anyway. | ||
3326 | */ | ||
3258 | if (!eb) { | 3327 | if (!eb) { |
3328 | spin_unlock(&mapping->private_lock); | ||
3259 | WARN_ON(1); | 3329 | WARN_ON(1); |
3260 | continue; | 3330 | continue; |
3261 | } | 3331 | } |
3262 | 3332 | ||
3263 | if (eb == prev_eb) | 3333 | if (eb == prev_eb) { |
3334 | spin_unlock(&mapping->private_lock); | ||
3264 | continue; | 3335 | continue; |
3336 | } | ||
3265 | 3337 | ||
3266 | if (!atomic_inc_not_zero(&eb->refs)) { | 3338 | ret = atomic_inc_not_zero(&eb->refs); |
3267 | WARN_ON(1); | 3339 | spin_unlock(&mapping->private_lock); |
3340 | if (!ret) | ||
3268 | continue; | 3341 | continue; |
3269 | } | ||
3270 | 3342 | ||
3271 | prev_eb = eb; | 3343 | prev_eb = eb; |
3272 | ret = lock_extent_buffer_for_io(eb, fs_info, &epd); | 3344 | ret = lock_extent_buffer_for_io(eb, fs_info, &epd); |
@@ -3457,7 +3529,7 @@ static void flush_epd_write_bio(struct extent_page_data *epd) | |||
3457 | if (epd->sync_io) | 3529 | if (epd->sync_io) |
3458 | rw = WRITE_SYNC; | 3530 | rw = WRITE_SYNC; |
3459 | 3531 | ||
3460 | ret = submit_one_bio(rw, epd->bio, 0, 0); | 3532 | ret = submit_one_bio(rw, epd->bio, 0, epd->bio_flags); |
3461 | BUG_ON(ret < 0); /* -ENOMEM */ | 3533 | BUG_ON(ret < 0); /* -ENOMEM */ |
3462 | epd->bio = NULL; | 3534 | epd->bio = NULL; |
3463 | } | 3535 | } |
@@ -3480,6 +3552,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
3480 | .get_extent = get_extent, | 3552 | .get_extent = get_extent, |
3481 | .extent_locked = 0, | 3553 | .extent_locked = 0, |
3482 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 3554 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
3555 | .bio_flags = 0, | ||
3483 | }; | 3556 | }; |
3484 | 3557 | ||
3485 | ret = __extent_writepage(page, wbc, &epd); | 3558 | ret = __extent_writepage(page, wbc, &epd); |
@@ -3504,6 +3577,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
3504 | .get_extent = get_extent, | 3577 | .get_extent = get_extent, |
3505 | .extent_locked = 1, | 3578 | .extent_locked = 1, |
3506 | .sync_io = mode == WB_SYNC_ALL, | 3579 | .sync_io = mode == WB_SYNC_ALL, |
3580 | .bio_flags = 0, | ||
3507 | }; | 3581 | }; |
3508 | struct writeback_control wbc_writepages = { | 3582 | struct writeback_control wbc_writepages = { |
3509 | .sync_mode = mode, | 3583 | .sync_mode = mode, |
@@ -3543,6 +3617,7 @@ int extent_writepages(struct extent_io_tree *tree, | |||
3543 | .get_extent = get_extent, | 3617 | .get_extent = get_extent, |
3544 | .extent_locked = 0, | 3618 | .extent_locked = 0, |
3545 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 3619 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
3620 | .bio_flags = 0, | ||
3546 | }; | 3621 | }; |
3547 | 3622 | ||
3548 | ret = extent_write_cache_pages(tree, mapping, wbc, | 3623 | ret = extent_write_cache_pages(tree, mapping, wbc, |
@@ -3920,18 +3995,6 @@ out: | |||
3920 | return ret; | 3995 | return ret; |
3921 | } | 3996 | } |
3922 | 3997 | ||
3923 | inline struct page *extent_buffer_page(struct extent_buffer *eb, | ||
3924 | unsigned long i) | ||
3925 | { | ||
3926 | return eb->pages[i]; | ||
3927 | } | ||
3928 | |||
3929 | inline unsigned long num_extent_pages(u64 start, u64 len) | ||
3930 | { | ||
3931 | return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - | ||
3932 | (start >> PAGE_CACHE_SHIFT); | ||
3933 | } | ||
3934 | |||
3935 | static void __free_extent_buffer(struct extent_buffer *eb) | 3998 | static void __free_extent_buffer(struct extent_buffer *eb) |
3936 | { | 3999 | { |
3937 | #if LEAK_DEBUG | 4000 | #if LEAK_DEBUG |
@@ -4047,7 +4110,7 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) | |||
4047 | 4110 | ||
4048 | return eb; | 4111 | return eb; |
4049 | err: | 4112 | err: |
4050 | for (i--; i > 0; i--) | 4113 | for (i--; i >= 0; i--) |
4051 | __free_page(eb->pages[i]); | 4114 | __free_page(eb->pages[i]); |
4052 | __free_extent_buffer(eb); | 4115 | __free_extent_buffer(eb); |
4053 | return NULL; | 4116 | return NULL; |
@@ -4192,10 +4255,8 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
4192 | 4255 | ||
4193 | for (i = 0; i < num_pages; i++, index++) { | 4256 | for (i = 0; i < num_pages; i++, index++) { |
4194 | p = find_or_create_page(mapping, index, GFP_NOFS); | 4257 | p = find_or_create_page(mapping, index, GFP_NOFS); |
4195 | if (!p) { | 4258 | if (!p) |
4196 | WARN_ON(1); | ||
4197 | goto free_eb; | 4259 | goto free_eb; |
4198 | } | ||
4199 | 4260 | ||
4200 | spin_lock(&mapping->private_lock); | 4261 | spin_lock(&mapping->private_lock); |
4201 | if (PagePrivate(p)) { | 4262 | if (PagePrivate(p)) { |
@@ -4338,7 +4399,6 @@ static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask) | |||
4338 | 4399 | ||
4339 | /* Should be safe to release our pages at this point */ | 4400 | /* Should be safe to release our pages at this point */ |
4340 | btrfs_release_extent_buffer_page(eb, 0); | 4401 | btrfs_release_extent_buffer_page(eb, 0); |
4341 | |||
4342 | call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); | 4402 | call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); |
4343 | return 1; | 4403 | return 1; |
4344 | } | 4404 | } |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 25900af5b15d..711d12b80028 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -27,6 +27,7 @@ | |||
27 | * type for this bio | 27 | * type for this bio |
28 | */ | 28 | */ |
29 | #define EXTENT_BIO_COMPRESSED 1 | 29 | #define EXTENT_BIO_COMPRESSED 1 |
30 | #define EXTENT_BIO_TREE_LOG 2 | ||
30 | #define EXTENT_BIO_FLAG_SHIFT 16 | 31 | #define EXTENT_BIO_FLAG_SHIFT 16 |
31 | 32 | ||
32 | /* these are bit numbers for test/set bit */ | 33 | /* these are bit numbers for test/set bit */ |
@@ -232,11 +233,15 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | |||
232 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 233 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
233 | gfp_t mask); | 234 | gfp_t mask); |
234 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 235 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
235 | int bits, int clear_bits, gfp_t mask); | 236 | int bits, int clear_bits, |
237 | struct extent_state **cached_state, gfp_t mask); | ||
236 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | 238 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, |
237 | struct extent_state **cached_state, gfp_t mask); | 239 | struct extent_state **cached_state, gfp_t mask); |
240 | int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end, | ||
241 | struct extent_state **cached_state, gfp_t mask); | ||
238 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | 242 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, |
239 | u64 *start_ret, u64 *end_ret, int bits); | 243 | u64 *start_ret, u64 *end_ret, int bits, |
244 | struct extent_state **cached_state); | ||
240 | struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, | 245 | struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, |
241 | u64 start, int bits); | 246 | u64 start, int bits); |
242 | int extent_invalidatepage(struct extent_io_tree *tree, | 247 | int extent_invalidatepage(struct extent_io_tree *tree, |
@@ -277,8 +282,18 @@ void free_extent_buffer_stale(struct extent_buffer *eb); | |||
277 | int read_extent_buffer_pages(struct extent_io_tree *tree, | 282 | int read_extent_buffer_pages(struct extent_io_tree *tree, |
278 | struct extent_buffer *eb, u64 start, int wait, | 283 | struct extent_buffer *eb, u64 start, int wait, |
279 | get_extent_t *get_extent, int mirror_num); | 284 | get_extent_t *get_extent, int mirror_num); |
280 | unsigned long num_extent_pages(u64 start, u64 len); | 285 | |
281 | struct page *extent_buffer_page(struct extent_buffer *eb, unsigned long i); | 286 | static inline unsigned long num_extent_pages(u64 start, u64 len) |
287 | { | ||
288 | return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - | ||
289 | (start >> PAGE_CACHE_SHIFT); | ||
290 | } | ||
291 | |||
292 | static inline struct page *extent_buffer_page(struct extent_buffer *eb, | ||
293 | unsigned long i) | ||
294 | { | ||
295 | return eb->pages[i]; | ||
296 | } | ||
282 | 297 | ||
283 | static inline void extent_buffer_get(struct extent_buffer *eb) | 298 | static inline void extent_buffer_get(struct extent_buffer *eb) |
284 | { | 299 | { |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 7c97b3301459..b8cbc8d5c7f7 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -11,7 +11,7 @@ static struct kmem_cache *extent_map_cache; | |||
11 | 11 | ||
12 | int __init extent_map_init(void) | 12 | int __init extent_map_init(void) |
13 | { | 13 | { |
14 | extent_map_cache = kmem_cache_create("extent_map", | 14 | extent_map_cache = kmem_cache_create("btrfs_extent_map", |
15 | sizeof(struct extent_map), 0, | 15 | sizeof(struct extent_map), 0, |
16 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 16 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
17 | if (!extent_map_cache) | 17 | if (!extent_map_cache) |
@@ -35,6 +35,7 @@ void extent_map_exit(void) | |||
35 | void extent_map_tree_init(struct extent_map_tree *tree) | 35 | void extent_map_tree_init(struct extent_map_tree *tree) |
36 | { | 36 | { |
37 | tree->map = RB_ROOT; | 37 | tree->map = RB_ROOT; |
38 | INIT_LIST_HEAD(&tree->modified_extents); | ||
38 | rwlock_init(&tree->lock); | 39 | rwlock_init(&tree->lock); |
39 | } | 40 | } |
40 | 41 | ||
@@ -54,7 +55,9 @@ struct extent_map *alloc_extent_map(void) | |||
54 | em->in_tree = 0; | 55 | em->in_tree = 0; |
55 | em->flags = 0; | 56 | em->flags = 0; |
56 | em->compress_type = BTRFS_COMPRESS_NONE; | 57 | em->compress_type = BTRFS_COMPRESS_NONE; |
58 | em->generation = 0; | ||
57 | atomic_set(&em->refs, 1); | 59 | atomic_set(&em->refs, 1); |
60 | INIT_LIST_HEAD(&em->list); | ||
58 | return em; | 61 | return em; |
59 | } | 62 | } |
60 | 63 | ||
@@ -72,6 +75,7 @@ void free_extent_map(struct extent_map *em) | |||
72 | WARN_ON(atomic_read(&em->refs) == 0); | 75 | WARN_ON(atomic_read(&em->refs) == 0); |
73 | if (atomic_dec_and_test(&em->refs)) { | 76 | if (atomic_dec_and_test(&em->refs)) { |
74 | WARN_ON(em->in_tree); | 77 | WARN_ON(em->in_tree); |
78 | WARN_ON(!list_empty(&em->list)); | ||
75 | kmem_cache_free(extent_map_cache, em); | 79 | kmem_cache_free(extent_map_cache, em); |
76 | } | 80 | } |
77 | } | 81 | } |
@@ -198,6 +202,14 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
198 | em->block_len += merge->block_len; | 202 | em->block_len += merge->block_len; |
199 | em->block_start = merge->block_start; | 203 | em->block_start = merge->block_start; |
200 | merge->in_tree = 0; | 204 | merge->in_tree = 0; |
205 | if (merge->generation > em->generation) { | ||
206 | em->mod_start = em->start; | ||
207 | em->mod_len = em->len; | ||
208 | em->generation = merge->generation; | ||
209 | list_move(&em->list, &tree->modified_extents); | ||
210 | } | ||
211 | |||
212 | list_del_init(&merge->list); | ||
201 | rb_erase(&merge->rb_node, &tree->map); | 213 | rb_erase(&merge->rb_node, &tree->map); |
202 | free_extent_map(merge); | 214 | free_extent_map(merge); |
203 | } | 215 | } |
@@ -211,14 +223,34 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
211 | em->block_len += merge->len; | 223 | em->block_len += merge->len; |
212 | rb_erase(&merge->rb_node, &tree->map); | 224 | rb_erase(&merge->rb_node, &tree->map); |
213 | merge->in_tree = 0; | 225 | merge->in_tree = 0; |
226 | if (merge->generation > em->generation) { | ||
227 | em->mod_len = em->len; | ||
228 | em->generation = merge->generation; | ||
229 | list_move(&em->list, &tree->modified_extents); | ||
230 | } | ||
231 | list_del_init(&merge->list); | ||
214 | free_extent_map(merge); | 232 | free_extent_map(merge); |
215 | } | 233 | } |
216 | } | 234 | } |
217 | 235 | ||
218 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) | 236 | /** |
237 | * unpint_extent_cache - unpin an extent from the cache | ||
238 | * @tree: tree to unpin the extent in | ||
239 | * @start: logical offset in the file | ||
240 | * @len: length of the extent | ||
241 | * @gen: generation that this extent has been modified in | ||
242 | * @prealloc: if this is set we need to clear the prealloc flag | ||
243 | * | ||
244 | * Called after an extent has been written to disk properly. Set the generation | ||
245 | * to the generation that actually added the file item to the inode so we know | ||
246 | * we need to sync this extent when we call fsync(). | ||
247 | */ | ||
248 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, | ||
249 | u64 gen) | ||
219 | { | 250 | { |
220 | int ret = 0; | 251 | int ret = 0; |
221 | struct extent_map *em; | 252 | struct extent_map *em; |
253 | bool prealloc = false; | ||
222 | 254 | ||
223 | write_lock(&tree->lock); | 255 | write_lock(&tree->lock); |
224 | em = lookup_extent_mapping(tree, start, len); | 256 | em = lookup_extent_mapping(tree, start, len); |
@@ -228,10 +260,24 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) | |||
228 | if (!em) | 260 | if (!em) |
229 | goto out; | 261 | goto out; |
230 | 262 | ||
263 | list_move(&em->list, &tree->modified_extents); | ||
264 | em->generation = gen; | ||
231 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | 265 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); |
266 | em->mod_start = em->start; | ||
267 | em->mod_len = em->len; | ||
268 | |||
269 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { | ||
270 | prealloc = true; | ||
271 | clear_bit(EXTENT_FLAG_PREALLOC, &em->flags); | ||
272 | } | ||
232 | 273 | ||
233 | try_merge_map(tree, em); | 274 | try_merge_map(tree, em); |
234 | 275 | ||
276 | if (prealloc) { | ||
277 | em->mod_start = em->start; | ||
278 | em->mod_len = em->len; | ||
279 | } | ||
280 | |||
235 | free_extent_map(em); | 281 | free_extent_map(em); |
236 | out: | 282 | out: |
237 | write_unlock(&tree->lock); | 283 | write_unlock(&tree->lock); |
@@ -269,6 +315,9 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
269 | } | 315 | } |
270 | atomic_inc(&em->refs); | 316 | atomic_inc(&em->refs); |
271 | 317 | ||
318 | em->mod_start = em->start; | ||
319 | em->mod_len = em->len; | ||
320 | |||
272 | try_merge_map(tree, em); | 321 | try_merge_map(tree, em); |
273 | out: | 322 | out: |
274 | return ret; | 323 | return ret; |
@@ -358,6 +407,8 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) | |||
358 | 407 | ||
359 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); | 408 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); |
360 | rb_erase(&em->rb_node, &tree->map); | 409 | rb_erase(&em->rb_node, &tree->map); |
410 | if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) | ||
411 | list_del_init(&em->list); | ||
361 | em->in_tree = 0; | 412 | em->in_tree = 0; |
362 | return ret; | 413 | return ret; |
363 | } | 414 | } |
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 1195f09761fe..679225555f7b 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
@@ -13,6 +13,7 @@ | |||
13 | #define EXTENT_FLAG_COMPRESSED 1 | 13 | #define EXTENT_FLAG_COMPRESSED 1 |
14 | #define EXTENT_FLAG_VACANCY 2 /* no file extent item found */ | 14 | #define EXTENT_FLAG_VACANCY 2 /* no file extent item found */ |
15 | #define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */ | 15 | #define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */ |
16 | #define EXTENT_FLAG_LOGGING 4 /* Logging this extent */ | ||
16 | 17 | ||
17 | struct extent_map { | 18 | struct extent_map { |
18 | struct rb_node rb_node; | 19 | struct rb_node rb_node; |
@@ -20,18 +21,23 @@ struct extent_map { | |||
20 | /* all of these are in bytes */ | 21 | /* all of these are in bytes */ |
21 | u64 start; | 22 | u64 start; |
22 | u64 len; | 23 | u64 len; |
24 | u64 mod_start; | ||
25 | u64 mod_len; | ||
23 | u64 orig_start; | 26 | u64 orig_start; |
24 | u64 block_start; | 27 | u64 block_start; |
25 | u64 block_len; | 28 | u64 block_len; |
29 | u64 generation; | ||
26 | unsigned long flags; | 30 | unsigned long flags; |
27 | struct block_device *bdev; | 31 | struct block_device *bdev; |
28 | atomic_t refs; | 32 | atomic_t refs; |
29 | unsigned int in_tree; | 33 | unsigned int in_tree; |
30 | unsigned int compress_type; | 34 | unsigned int compress_type; |
35 | struct list_head list; | ||
31 | }; | 36 | }; |
32 | 37 | ||
33 | struct extent_map_tree { | 38 | struct extent_map_tree { |
34 | struct rb_root map; | 39 | struct rb_root map; |
40 | struct list_head modified_extents; | ||
35 | rwlock_t lock; | 41 | rwlock_t lock; |
36 | }; | 42 | }; |
37 | 43 | ||
@@ -60,7 +66,7 @@ struct extent_map *alloc_extent_map(void); | |||
60 | void free_extent_map(struct extent_map *em); | 66 | void free_extent_map(struct extent_map *em); |
61 | int __init extent_map_init(void); | 67 | int __init extent_map_init(void); |
62 | void extent_map_exit(void); | 68 | void extent_map_exit(void); |
63 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len); | 69 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen); |
64 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, | 70 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, |
65 | u64 start, u64 len); | 71 | u64 start, u64 len); |
66 | #endif | 72 | #endif |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 857d93cd01dc..1ad08e4e4a15 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -25,11 +25,12 @@ | |||
25 | #include "transaction.h" | 25 | #include "transaction.h" |
26 | #include "print-tree.h" | 26 | #include "print-tree.h" |
27 | 27 | ||
28 | #define __MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \ | 28 | #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ |
29 | sizeof(struct btrfs_item) * 2) / \ | 29 | sizeof(struct btrfs_item) * 2) / \ |
30 | size) - 1)) | 30 | size) - 1)) |
31 | 31 | ||
32 | #define MAX_CSUM_ITEMS(r, size) (min(__MAX_CSUM_ITEMS(r, size), PAGE_CACHE_SIZE)) | 32 | #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \ |
33 | PAGE_CACHE_SIZE)) | ||
33 | 34 | ||
34 | #define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \ | 35 | #define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \ |
35 | sizeof(struct btrfs_ordered_sum)) / \ | 36 | sizeof(struct btrfs_ordered_sum)) / \ |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f6b40e86121b..9ab1bed88116 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include "tree-log.h" | 39 | #include "tree-log.h" |
40 | #include "locking.h" | 40 | #include "locking.h" |
41 | #include "compat.h" | 41 | #include "compat.h" |
42 | #include "volumes.h" | ||
42 | 43 | ||
43 | /* | 44 | /* |
44 | * when auto defrag is enabled we | 45 | * when auto defrag is enabled we |
@@ -458,14 +459,15 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, | |||
458 | * this drops all the extents in the cache that intersect the range | 459 | * this drops all the extents in the cache that intersect the range |
459 | * [start, end]. Existing extents are split as required. | 460 | * [start, end]. Existing extents are split as required. |
460 | */ | 461 | */ |
461 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 462 | void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
462 | int skip_pinned) | 463 | int skip_pinned) |
463 | { | 464 | { |
464 | struct extent_map *em; | 465 | struct extent_map *em; |
465 | struct extent_map *split = NULL; | 466 | struct extent_map *split = NULL; |
466 | struct extent_map *split2 = NULL; | 467 | struct extent_map *split2 = NULL; |
467 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 468 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
468 | u64 len = end - start + 1; | 469 | u64 len = end - start + 1; |
470 | u64 gen; | ||
469 | int ret; | 471 | int ret; |
470 | int testend = 1; | 472 | int testend = 1; |
471 | unsigned long flags; | 473 | unsigned long flags; |
@@ -477,11 +479,14 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
477 | testend = 0; | 479 | testend = 0; |
478 | } | 480 | } |
479 | while (1) { | 481 | while (1) { |
482 | int no_splits = 0; | ||
483 | |||
480 | if (!split) | 484 | if (!split) |
481 | split = alloc_extent_map(); | 485 | split = alloc_extent_map(); |
482 | if (!split2) | 486 | if (!split2) |
483 | split2 = alloc_extent_map(); | 487 | split2 = alloc_extent_map(); |
484 | BUG_ON(!split || !split2); /* -ENOMEM */ | 488 | if (!split || !split2) |
489 | no_splits = 1; | ||
485 | 490 | ||
486 | write_lock(&em_tree->lock); | 491 | write_lock(&em_tree->lock); |
487 | em = lookup_extent_mapping(em_tree, start, len); | 492 | em = lookup_extent_mapping(em_tree, start, len); |
@@ -490,6 +495,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
490 | break; | 495 | break; |
491 | } | 496 | } |
492 | flags = em->flags; | 497 | flags = em->flags; |
498 | gen = em->generation; | ||
493 | if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { | 499 | if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { |
494 | if (testend && em->start + em->len >= start + len) { | 500 | if (testend && em->start + em->len >= start + len) { |
495 | free_extent_map(em); | 501 | free_extent_map(em); |
@@ -506,6 +512,8 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
506 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 512 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
507 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | 513 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); |
508 | remove_extent_mapping(em_tree, em); | 514 | remove_extent_mapping(em_tree, em); |
515 | if (no_splits) | ||
516 | goto next; | ||
509 | 517 | ||
510 | if (em->block_start < EXTENT_MAP_LAST_BYTE && | 518 | if (em->block_start < EXTENT_MAP_LAST_BYTE && |
511 | em->start < start) { | 519 | em->start < start) { |
@@ -518,12 +526,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
518 | split->block_len = em->block_len; | 526 | split->block_len = em->block_len; |
519 | else | 527 | else |
520 | split->block_len = split->len; | 528 | split->block_len = split->len; |
521 | 529 | split->generation = gen; | |
522 | split->bdev = em->bdev; | 530 | split->bdev = em->bdev; |
523 | split->flags = flags; | 531 | split->flags = flags; |
524 | split->compress_type = em->compress_type; | 532 | split->compress_type = em->compress_type; |
525 | ret = add_extent_mapping(em_tree, split); | 533 | ret = add_extent_mapping(em_tree, split); |
526 | BUG_ON(ret); /* Logic error */ | 534 | BUG_ON(ret); /* Logic error */ |
535 | list_move(&split->list, &em_tree->modified_extents); | ||
527 | free_extent_map(split); | 536 | free_extent_map(split); |
528 | split = split2; | 537 | split = split2; |
529 | split2 = NULL; | 538 | split2 = NULL; |
@@ -537,6 +546,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
537 | split->bdev = em->bdev; | 546 | split->bdev = em->bdev; |
538 | split->flags = flags; | 547 | split->flags = flags; |
539 | split->compress_type = em->compress_type; | 548 | split->compress_type = em->compress_type; |
549 | split->generation = gen; | ||
540 | 550 | ||
541 | if (compressed) { | 551 | if (compressed) { |
542 | split->block_len = em->block_len; | 552 | split->block_len = em->block_len; |
@@ -550,9 +560,11 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
550 | 560 | ||
551 | ret = add_extent_mapping(em_tree, split); | 561 | ret = add_extent_mapping(em_tree, split); |
552 | BUG_ON(ret); /* Logic error */ | 562 | BUG_ON(ret); /* Logic error */ |
563 | list_move(&split->list, &em_tree->modified_extents); | ||
553 | free_extent_map(split); | 564 | free_extent_map(split); |
554 | split = NULL; | 565 | split = NULL; |
555 | } | 566 | } |
567 | next: | ||
556 | write_unlock(&em_tree->lock); | 568 | write_unlock(&em_tree->lock); |
557 | 569 | ||
558 | /* once for us */ | 570 | /* once for us */ |
@@ -564,7 +576,6 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
564 | free_extent_map(split); | 576 | free_extent_map(split); |
565 | if (split2) | 577 | if (split2) |
566 | free_extent_map(split2); | 578 | free_extent_map(split2); |
567 | return 0; | ||
568 | } | 579 | } |
569 | 580 | ||
570 | /* | 581 | /* |
@@ -576,13 +587,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
576 | * it is either truncated or split. Anything entirely inside the range | 587 | * it is either truncated or split. Anything entirely inside the range |
577 | * is deleted from the tree. | 588 | * is deleted from the tree. |
578 | */ | 589 | */ |
579 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, | 590 | int __btrfs_drop_extents(struct btrfs_trans_handle *trans, |
580 | u64 start, u64 end, u64 *hint_byte, int drop_cache) | 591 | struct btrfs_root *root, struct inode *inode, |
592 | struct btrfs_path *path, u64 start, u64 end, | ||
593 | u64 *drop_end, int drop_cache) | ||
581 | { | 594 | { |
582 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
583 | struct extent_buffer *leaf; | 595 | struct extent_buffer *leaf; |
584 | struct btrfs_file_extent_item *fi; | 596 | struct btrfs_file_extent_item *fi; |
585 | struct btrfs_path *path; | ||
586 | struct btrfs_key key; | 597 | struct btrfs_key key; |
587 | struct btrfs_key new_key; | 598 | struct btrfs_key new_key; |
588 | u64 ino = btrfs_ino(inode); | 599 | u64 ino = btrfs_ino(inode); |
@@ -597,14 +608,12 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, | |||
597 | int recow; | 608 | int recow; |
598 | int ret; | 609 | int ret; |
599 | int modify_tree = -1; | 610 | int modify_tree = -1; |
611 | int update_refs = (root->ref_cows || root == root->fs_info->tree_root); | ||
612 | int found = 0; | ||
600 | 613 | ||
601 | if (drop_cache) | 614 | if (drop_cache) |
602 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | 615 | btrfs_drop_extent_cache(inode, start, end - 1, 0); |
603 | 616 | ||
604 | path = btrfs_alloc_path(); | ||
605 | if (!path) | ||
606 | return -ENOMEM; | ||
607 | |||
608 | if (start >= BTRFS_I(inode)->disk_i_size) | 617 | if (start >= BTRFS_I(inode)->disk_i_size) |
609 | modify_tree = 0; | 618 | modify_tree = 0; |
610 | 619 | ||
@@ -666,6 +675,7 @@ next_slot: | |||
666 | goto next_slot; | 675 | goto next_slot; |
667 | } | 676 | } |
668 | 677 | ||
678 | found = 1; | ||
669 | search_start = max(key.offset, start); | 679 | search_start = max(key.offset, start); |
670 | if (recow || !modify_tree) { | 680 | if (recow || !modify_tree) { |
671 | modify_tree = -1; | 681 | modify_tree = -1; |
@@ -707,14 +717,13 @@ next_slot: | |||
707 | extent_end - start); | 717 | extent_end - start); |
708 | btrfs_mark_buffer_dirty(leaf); | 718 | btrfs_mark_buffer_dirty(leaf); |
709 | 719 | ||
710 | if (disk_bytenr > 0) { | 720 | if (update_refs && disk_bytenr > 0) { |
711 | ret = btrfs_inc_extent_ref(trans, root, | 721 | ret = btrfs_inc_extent_ref(trans, root, |
712 | disk_bytenr, num_bytes, 0, | 722 | disk_bytenr, num_bytes, 0, |
713 | root->root_key.objectid, | 723 | root->root_key.objectid, |
714 | new_key.objectid, | 724 | new_key.objectid, |
715 | start - extent_offset, 0); | 725 | start - extent_offset, 0); |
716 | BUG_ON(ret); /* -ENOMEM */ | 726 | BUG_ON(ret); /* -ENOMEM */ |
717 | *hint_byte = disk_bytenr; | ||
718 | } | 727 | } |
719 | key.offset = start; | 728 | key.offset = start; |
720 | } | 729 | } |
@@ -734,10 +743,8 @@ next_slot: | |||
734 | btrfs_set_file_extent_num_bytes(leaf, fi, | 743 | btrfs_set_file_extent_num_bytes(leaf, fi, |
735 | extent_end - end); | 744 | extent_end - end); |
736 | btrfs_mark_buffer_dirty(leaf); | 745 | btrfs_mark_buffer_dirty(leaf); |
737 | if (disk_bytenr > 0) { | 746 | if (update_refs && disk_bytenr > 0) |
738 | inode_sub_bytes(inode, end - key.offset); | 747 | inode_sub_bytes(inode, end - key.offset); |
739 | *hint_byte = disk_bytenr; | ||
740 | } | ||
741 | break; | 748 | break; |
742 | } | 749 | } |
743 | 750 | ||
@@ -753,10 +760,8 @@ next_slot: | |||
753 | btrfs_set_file_extent_num_bytes(leaf, fi, | 760 | btrfs_set_file_extent_num_bytes(leaf, fi, |
754 | start - key.offset); | 761 | start - key.offset); |
755 | btrfs_mark_buffer_dirty(leaf); | 762 | btrfs_mark_buffer_dirty(leaf); |
756 | if (disk_bytenr > 0) { | 763 | if (update_refs && disk_bytenr > 0) |
757 | inode_sub_bytes(inode, extent_end - start); | 764 | inode_sub_bytes(inode, extent_end - start); |
758 | *hint_byte = disk_bytenr; | ||
759 | } | ||
760 | if (end == extent_end) | 765 | if (end == extent_end) |
761 | break; | 766 | break; |
762 | 767 | ||
@@ -777,12 +782,13 @@ next_slot: | |||
777 | del_nr++; | 782 | del_nr++; |
778 | } | 783 | } |
779 | 784 | ||
780 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | 785 | if (update_refs && |
786 | extent_type == BTRFS_FILE_EXTENT_INLINE) { | ||
781 | inode_sub_bytes(inode, | 787 | inode_sub_bytes(inode, |
782 | extent_end - key.offset); | 788 | extent_end - key.offset); |
783 | extent_end = ALIGN(extent_end, | 789 | extent_end = ALIGN(extent_end, |
784 | root->sectorsize); | 790 | root->sectorsize); |
785 | } else if (disk_bytenr > 0) { | 791 | } else if (update_refs && disk_bytenr > 0) { |
786 | ret = btrfs_free_extent(trans, root, | 792 | ret = btrfs_free_extent(trans, root, |
787 | disk_bytenr, num_bytes, 0, | 793 | disk_bytenr, num_bytes, 0, |
788 | root->root_key.objectid, | 794 | root->root_key.objectid, |
@@ -791,7 +797,6 @@ next_slot: | |||
791 | BUG_ON(ret); /* -ENOMEM */ | 797 | BUG_ON(ret); /* -ENOMEM */ |
792 | inode_sub_bytes(inode, | 798 | inode_sub_bytes(inode, |
793 | extent_end - key.offset); | 799 | extent_end - key.offset); |
794 | *hint_byte = disk_bytenr; | ||
795 | } | 800 | } |
796 | 801 | ||
797 | if (end == extent_end) | 802 | if (end == extent_end) |
@@ -806,7 +811,7 @@ next_slot: | |||
806 | del_nr); | 811 | del_nr); |
807 | if (ret) { | 812 | if (ret) { |
808 | btrfs_abort_transaction(trans, root, ret); | 813 | btrfs_abort_transaction(trans, root, ret); |
809 | goto out; | 814 | break; |
810 | } | 815 | } |
811 | 816 | ||
812 | del_nr = 0; | 817 | del_nr = 0; |
@@ -825,7 +830,24 @@ next_slot: | |||
825 | btrfs_abort_transaction(trans, root, ret); | 830 | btrfs_abort_transaction(trans, root, ret); |
826 | } | 831 | } |
827 | 832 | ||
828 | out: | 833 | if (drop_end) |
834 | *drop_end = found ? min(end, extent_end) : end; | ||
835 | btrfs_release_path(path); | ||
836 | return ret; | ||
837 | } | ||
838 | |||
839 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, | ||
840 | struct btrfs_root *root, struct inode *inode, u64 start, | ||
841 | u64 end, int drop_cache) | ||
842 | { | ||
843 | struct btrfs_path *path; | ||
844 | int ret; | ||
845 | |||
846 | path = btrfs_alloc_path(); | ||
847 | if (!path) | ||
848 | return -ENOMEM; | ||
849 | ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL, | ||
850 | drop_cache); | ||
829 | btrfs_free_path(path); | 851 | btrfs_free_path(path); |
830 | return ret; | 852 | return ret; |
831 | } | 853 | } |
@@ -892,8 +914,6 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | |||
892 | int ret; | 914 | int ret; |
893 | u64 ino = btrfs_ino(inode); | 915 | u64 ino = btrfs_ino(inode); |
894 | 916 | ||
895 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | ||
896 | |||
897 | path = btrfs_alloc_path(); | 917 | path = btrfs_alloc_path(); |
898 | if (!path) | 918 | if (!path) |
899 | return -ENOMEM; | 919 | return -ENOMEM; |
@@ -935,12 +955,16 @@ again: | |||
935 | btrfs_set_item_key_safe(trans, root, path, &new_key); | 955 | btrfs_set_item_key_safe(trans, root, path, &new_key); |
936 | fi = btrfs_item_ptr(leaf, path->slots[0], | 956 | fi = btrfs_item_ptr(leaf, path->slots[0], |
937 | struct btrfs_file_extent_item); | 957 | struct btrfs_file_extent_item); |
958 | btrfs_set_file_extent_generation(leaf, fi, | ||
959 | trans->transid); | ||
938 | btrfs_set_file_extent_num_bytes(leaf, fi, | 960 | btrfs_set_file_extent_num_bytes(leaf, fi, |
939 | extent_end - end); | 961 | extent_end - end); |
940 | btrfs_set_file_extent_offset(leaf, fi, | 962 | btrfs_set_file_extent_offset(leaf, fi, |
941 | end - orig_offset); | 963 | end - orig_offset); |
942 | fi = btrfs_item_ptr(leaf, path->slots[0] - 1, | 964 | fi = btrfs_item_ptr(leaf, path->slots[0] - 1, |
943 | struct btrfs_file_extent_item); | 965 | struct btrfs_file_extent_item); |
966 | btrfs_set_file_extent_generation(leaf, fi, | ||
967 | trans->transid); | ||
944 | btrfs_set_file_extent_num_bytes(leaf, fi, | 968 | btrfs_set_file_extent_num_bytes(leaf, fi, |
945 | end - other_start); | 969 | end - other_start); |
946 | btrfs_mark_buffer_dirty(leaf); | 970 | btrfs_mark_buffer_dirty(leaf); |
@@ -958,12 +982,16 @@ again: | |||
958 | struct btrfs_file_extent_item); | 982 | struct btrfs_file_extent_item); |
959 | btrfs_set_file_extent_num_bytes(leaf, fi, | 983 | btrfs_set_file_extent_num_bytes(leaf, fi, |
960 | start - key.offset); | 984 | start - key.offset); |
985 | btrfs_set_file_extent_generation(leaf, fi, | ||
986 | trans->transid); | ||
961 | path->slots[0]++; | 987 | path->slots[0]++; |
962 | new_key.offset = start; | 988 | new_key.offset = start; |
963 | btrfs_set_item_key_safe(trans, root, path, &new_key); | 989 | btrfs_set_item_key_safe(trans, root, path, &new_key); |
964 | 990 | ||
965 | fi = btrfs_item_ptr(leaf, path->slots[0], | 991 | fi = btrfs_item_ptr(leaf, path->slots[0], |
966 | struct btrfs_file_extent_item); | 992 | struct btrfs_file_extent_item); |
993 | btrfs_set_file_extent_generation(leaf, fi, | ||
994 | trans->transid); | ||
967 | btrfs_set_file_extent_num_bytes(leaf, fi, | 995 | btrfs_set_file_extent_num_bytes(leaf, fi, |
968 | other_end - start); | 996 | other_end - start); |
969 | btrfs_set_file_extent_offset(leaf, fi, | 997 | btrfs_set_file_extent_offset(leaf, fi, |
@@ -991,12 +1019,14 @@ again: | |||
991 | leaf = path->nodes[0]; | 1019 | leaf = path->nodes[0]; |
992 | fi = btrfs_item_ptr(leaf, path->slots[0] - 1, | 1020 | fi = btrfs_item_ptr(leaf, path->slots[0] - 1, |
993 | struct btrfs_file_extent_item); | 1021 | struct btrfs_file_extent_item); |
1022 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); | ||
994 | btrfs_set_file_extent_num_bytes(leaf, fi, | 1023 | btrfs_set_file_extent_num_bytes(leaf, fi, |
995 | split - key.offset); | 1024 | split - key.offset); |
996 | 1025 | ||
997 | fi = btrfs_item_ptr(leaf, path->slots[0], | 1026 | fi = btrfs_item_ptr(leaf, path->slots[0], |
998 | struct btrfs_file_extent_item); | 1027 | struct btrfs_file_extent_item); |
999 | 1028 | ||
1029 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); | ||
1000 | btrfs_set_file_extent_offset(leaf, fi, split - orig_offset); | 1030 | btrfs_set_file_extent_offset(leaf, fi, split - orig_offset); |
1001 | btrfs_set_file_extent_num_bytes(leaf, fi, | 1031 | btrfs_set_file_extent_num_bytes(leaf, fi, |
1002 | extent_end - split); | 1032 | extent_end - split); |
@@ -1056,12 +1086,14 @@ again: | |||
1056 | struct btrfs_file_extent_item); | 1086 | struct btrfs_file_extent_item); |
1057 | btrfs_set_file_extent_type(leaf, fi, | 1087 | btrfs_set_file_extent_type(leaf, fi, |
1058 | BTRFS_FILE_EXTENT_REG); | 1088 | BTRFS_FILE_EXTENT_REG); |
1089 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); | ||
1059 | btrfs_mark_buffer_dirty(leaf); | 1090 | btrfs_mark_buffer_dirty(leaf); |
1060 | } else { | 1091 | } else { |
1061 | fi = btrfs_item_ptr(leaf, del_slot - 1, | 1092 | fi = btrfs_item_ptr(leaf, del_slot - 1, |
1062 | struct btrfs_file_extent_item); | 1093 | struct btrfs_file_extent_item); |
1063 | btrfs_set_file_extent_type(leaf, fi, | 1094 | btrfs_set_file_extent_type(leaf, fi, |
1064 | BTRFS_FILE_EXTENT_REG); | 1095 | BTRFS_FILE_EXTENT_REG); |
1096 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); | ||
1065 | btrfs_set_file_extent_num_bytes(leaf, fi, | 1097 | btrfs_set_file_extent_num_bytes(leaf, fi, |
1066 | extent_end - key.offset); | 1098 | extent_end - key.offset); |
1067 | btrfs_mark_buffer_dirty(leaf); | 1099 | btrfs_mark_buffer_dirty(leaf); |
@@ -1173,8 +1205,8 @@ again: | |||
1173 | 1205 | ||
1174 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, | 1206 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, |
1175 | last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | | 1207 | last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | |
1176 | EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, | 1208 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, |
1177 | GFP_NOFS); | 1209 | 0, 0, &cached_state, GFP_NOFS); |
1178 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | 1210 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
1179 | start_pos, last_pos - 1, &cached_state, | 1211 | start_pos, last_pos - 1, &cached_state, |
1180 | GFP_NOFS); | 1212 | GFP_NOFS); |
@@ -1514,16 +1546,24 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1514 | 1546 | ||
1515 | trace_btrfs_sync_file(file, datasync); | 1547 | trace_btrfs_sync_file(file, datasync); |
1516 | 1548 | ||
1549 | /* | ||
1550 | * We write the dirty pages in the range and wait until they complete | ||
1551 | * out of the ->i_mutex. If so, we can flush the dirty pages by | ||
1552 | * multi-task, and make the performance up. | ||
1553 | */ | ||
1554 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | ||
1555 | if (ret) | ||
1556 | return ret; | ||
1557 | |||
1517 | mutex_lock(&inode->i_mutex); | 1558 | mutex_lock(&inode->i_mutex); |
1518 | 1559 | ||
1519 | /* | 1560 | /* |
1520 | * we wait first, since the writeback may change the inode, also wait | 1561 | * We flush the dirty pages again to avoid some dirty pages in the |
1521 | * ordered range does a filemape_write_and_wait_range which is why we | 1562 | * range being left. |
1522 | * don't do it above like other file systems. | ||
1523 | */ | 1563 | */ |
1524 | root->log_batch++; | 1564 | atomic_inc(&root->log_batch); |
1525 | btrfs_wait_ordered_range(inode, start, end); | 1565 | btrfs_wait_ordered_range(inode, start, end); |
1526 | root->log_batch++; | 1566 | atomic_inc(&root->log_batch); |
1527 | 1567 | ||
1528 | /* | 1568 | /* |
1529 | * check the transaction that last modified this inode | 1569 | * check the transaction that last modified this inode |
@@ -1544,6 +1584,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1544 | BTRFS_I(inode)->last_trans <= | 1584 | BTRFS_I(inode)->last_trans <= |
1545 | root->fs_info->last_trans_committed) { | 1585 | root->fs_info->last_trans_committed) { |
1546 | BTRFS_I(inode)->last_trans = 0; | 1586 | BTRFS_I(inode)->last_trans = 0; |
1587 | |||
1588 | /* | ||
1589 | * We'v had everything committed since the last time we were | ||
1590 | * modified so clear this flag in case it was set for whatever | ||
1591 | * reason, it's no longer relevant. | ||
1592 | */ | ||
1593 | clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
1594 | &BTRFS_I(inode)->runtime_flags); | ||
1547 | mutex_unlock(&inode->i_mutex); | 1595 | mutex_unlock(&inode->i_mutex); |
1548 | goto out; | 1596 | goto out; |
1549 | } | 1597 | } |
@@ -1615,6 +1663,324 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | |||
1615 | return 0; | 1663 | return 0; |
1616 | } | 1664 | } |
1617 | 1665 | ||
1666 | static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf, | ||
1667 | int slot, u64 start, u64 end) | ||
1668 | { | ||
1669 | struct btrfs_file_extent_item *fi; | ||
1670 | struct btrfs_key key; | ||
1671 | |||
1672 | if (slot < 0 || slot >= btrfs_header_nritems(leaf)) | ||
1673 | return 0; | ||
1674 | |||
1675 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
1676 | if (key.objectid != btrfs_ino(inode) || | ||
1677 | key.type != BTRFS_EXTENT_DATA_KEY) | ||
1678 | return 0; | ||
1679 | |||
1680 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | ||
1681 | |||
1682 | if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) | ||
1683 | return 0; | ||
1684 | |||
1685 | if (btrfs_file_extent_disk_bytenr(leaf, fi)) | ||
1686 | return 0; | ||
1687 | |||
1688 | if (key.offset == end) | ||
1689 | return 1; | ||
1690 | if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start) | ||
1691 | return 1; | ||
1692 | return 0; | ||
1693 | } | ||
1694 | |||
1695 | static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode, | ||
1696 | struct btrfs_path *path, u64 offset, u64 end) | ||
1697 | { | ||
1698 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1699 | struct extent_buffer *leaf; | ||
1700 | struct btrfs_file_extent_item *fi; | ||
1701 | struct extent_map *hole_em; | ||
1702 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
1703 | struct btrfs_key key; | ||
1704 | int ret; | ||
1705 | |||
1706 | key.objectid = btrfs_ino(inode); | ||
1707 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
1708 | key.offset = offset; | ||
1709 | |||
1710 | |||
1711 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
1712 | if (ret < 0) | ||
1713 | return ret; | ||
1714 | BUG_ON(!ret); | ||
1715 | |||
1716 | leaf = path->nodes[0]; | ||
1717 | if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) { | ||
1718 | u64 num_bytes; | ||
1719 | |||
1720 | path->slots[0]--; | ||
1721 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
1722 | struct btrfs_file_extent_item); | ||
1723 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + | ||
1724 | end - offset; | ||
1725 | btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); | ||
1726 | btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); | ||
1727 | btrfs_set_file_extent_offset(leaf, fi, 0); | ||
1728 | btrfs_mark_buffer_dirty(leaf); | ||
1729 | goto out; | ||
1730 | } | ||
1731 | |||
1732 | if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) { | ||
1733 | u64 num_bytes; | ||
1734 | |||
1735 | path->slots[0]++; | ||
1736 | key.offset = offset; | ||
1737 | btrfs_set_item_key_safe(trans, root, path, &key); | ||
1738 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
1739 | struct btrfs_file_extent_item); | ||
1740 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end - | ||
1741 | offset; | ||
1742 | btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); | ||
1743 | btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); | ||
1744 | btrfs_set_file_extent_offset(leaf, fi, 0); | ||
1745 | btrfs_mark_buffer_dirty(leaf); | ||
1746 | goto out; | ||
1747 | } | ||
1748 | btrfs_release_path(path); | ||
1749 | |||
1750 | ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset, | ||
1751 | 0, 0, end - offset, 0, end - offset, | ||
1752 | 0, 0, 0); | ||
1753 | if (ret) | ||
1754 | return ret; | ||
1755 | |||
1756 | out: | ||
1757 | btrfs_release_path(path); | ||
1758 | |||
1759 | hole_em = alloc_extent_map(); | ||
1760 | if (!hole_em) { | ||
1761 | btrfs_drop_extent_cache(inode, offset, end - 1, 0); | ||
1762 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
1763 | &BTRFS_I(inode)->runtime_flags); | ||
1764 | } else { | ||
1765 | hole_em->start = offset; | ||
1766 | hole_em->len = end - offset; | ||
1767 | hole_em->orig_start = offset; | ||
1768 | |||
1769 | hole_em->block_start = EXTENT_MAP_HOLE; | ||
1770 | hole_em->block_len = 0; | ||
1771 | hole_em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
1772 | hole_em->compress_type = BTRFS_COMPRESS_NONE; | ||
1773 | hole_em->generation = trans->transid; | ||
1774 | |||
1775 | do { | ||
1776 | btrfs_drop_extent_cache(inode, offset, end - 1, 0); | ||
1777 | write_lock(&em_tree->lock); | ||
1778 | ret = add_extent_mapping(em_tree, hole_em); | ||
1779 | if (!ret) | ||
1780 | list_move(&hole_em->list, | ||
1781 | &em_tree->modified_extents); | ||
1782 | write_unlock(&em_tree->lock); | ||
1783 | } while (ret == -EEXIST); | ||
1784 | free_extent_map(hole_em); | ||
1785 | if (ret) | ||
1786 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
1787 | &BTRFS_I(inode)->runtime_flags); | ||
1788 | } | ||
1789 | |||
1790 | return 0; | ||
1791 | } | ||
1792 | |||
1793 | static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | ||
1794 | { | ||
1795 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1796 | struct extent_state *cached_state = NULL; | ||
1797 | struct btrfs_path *path; | ||
1798 | struct btrfs_block_rsv *rsv; | ||
1799 | struct btrfs_trans_handle *trans; | ||
1800 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | ||
1801 | u64 lockstart = (offset + mask) & ~mask; | ||
1802 | u64 lockend = ((offset + len) & ~mask) - 1; | ||
1803 | u64 cur_offset = lockstart; | ||
1804 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | ||
1805 | u64 drop_end; | ||
1806 | unsigned long nr; | ||
1807 | int ret = 0; | ||
1808 | int err = 0; | ||
1809 | bool same_page = (offset >> PAGE_CACHE_SHIFT) == | ||
1810 | ((offset + len) >> PAGE_CACHE_SHIFT); | ||
1811 | |||
1812 | btrfs_wait_ordered_range(inode, offset, len); | ||
1813 | |||
1814 | mutex_lock(&inode->i_mutex); | ||
1815 | if (offset >= inode->i_size) { | ||
1816 | mutex_unlock(&inode->i_mutex); | ||
1817 | return 0; | ||
1818 | } | ||
1819 | |||
1820 | /* | ||
1821 | * Only do this if we are in the same page and we aren't doing the | ||
1822 | * entire page. | ||
1823 | */ | ||
1824 | if (same_page && len < PAGE_CACHE_SIZE) { | ||
1825 | ret = btrfs_truncate_page(inode, offset, len, 0); | ||
1826 | mutex_unlock(&inode->i_mutex); | ||
1827 | return ret; | ||
1828 | } | ||
1829 | |||
1830 | /* zero back part of the first page */ | ||
1831 | ret = btrfs_truncate_page(inode, offset, 0, 0); | ||
1832 | if (ret) { | ||
1833 | mutex_unlock(&inode->i_mutex); | ||
1834 | return ret; | ||
1835 | } | ||
1836 | |||
1837 | /* zero the front end of the last page */ | ||
1838 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); | ||
1839 | if (ret) { | ||
1840 | mutex_unlock(&inode->i_mutex); | ||
1841 | return ret; | ||
1842 | } | ||
1843 | |||
1844 | if (lockend < lockstart) { | ||
1845 | mutex_unlock(&inode->i_mutex); | ||
1846 | return 0; | ||
1847 | } | ||
1848 | |||
1849 | while (1) { | ||
1850 | struct btrfs_ordered_extent *ordered; | ||
1851 | |||
1852 | truncate_pagecache_range(inode, lockstart, lockend); | ||
1853 | |||
1854 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
1855 | 0, &cached_state); | ||
1856 | ordered = btrfs_lookup_first_ordered_extent(inode, lockend); | ||
1857 | |||
1858 | /* | ||
1859 | * We need to make sure we have no ordered extents in this range | ||
1860 | * and nobody raced in and read a page in this range, if we did | ||
1861 | * we need to try again. | ||
1862 | */ | ||
1863 | if ((!ordered || | ||
1864 | (ordered->file_offset + ordered->len < lockstart || | ||
1865 | ordered->file_offset > lockend)) && | ||
1866 | !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
1867 | lockend, EXTENT_UPTODATE, 0, | ||
1868 | cached_state)) { | ||
1869 | if (ordered) | ||
1870 | btrfs_put_ordered_extent(ordered); | ||
1871 | break; | ||
1872 | } | ||
1873 | if (ordered) | ||
1874 | btrfs_put_ordered_extent(ordered); | ||
1875 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, | ||
1876 | lockend, &cached_state, GFP_NOFS); | ||
1877 | btrfs_wait_ordered_range(inode, lockstart, | ||
1878 | lockend - lockstart + 1); | ||
1879 | } | ||
1880 | |||
1881 | path = btrfs_alloc_path(); | ||
1882 | if (!path) { | ||
1883 | ret = -ENOMEM; | ||
1884 | goto out; | ||
1885 | } | ||
1886 | |||
1887 | rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); | ||
1888 | if (!rsv) { | ||
1889 | ret = -ENOMEM; | ||
1890 | goto out_free; | ||
1891 | } | ||
1892 | rsv->size = btrfs_calc_trunc_metadata_size(root, 1); | ||
1893 | rsv->failfast = 1; | ||
1894 | |||
1895 | /* | ||
1896 | * 1 - update the inode | ||
1897 | * 1 - removing the extents in the range | ||
1898 | * 1 - adding the hole extent | ||
1899 | */ | ||
1900 | trans = btrfs_start_transaction(root, 3); | ||
1901 | if (IS_ERR(trans)) { | ||
1902 | err = PTR_ERR(trans); | ||
1903 | goto out_free; | ||
1904 | } | ||
1905 | |||
1906 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, | ||
1907 | min_size); | ||
1908 | BUG_ON(ret); | ||
1909 | trans->block_rsv = rsv; | ||
1910 | |||
1911 | while (cur_offset < lockend) { | ||
1912 | ret = __btrfs_drop_extents(trans, root, inode, path, | ||
1913 | cur_offset, lockend + 1, | ||
1914 | &drop_end, 1); | ||
1915 | if (ret != -ENOSPC) | ||
1916 | break; | ||
1917 | |||
1918 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
1919 | |||
1920 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | ||
1921 | if (ret) { | ||
1922 | err = ret; | ||
1923 | break; | ||
1924 | } | ||
1925 | |||
1926 | cur_offset = drop_end; | ||
1927 | |||
1928 | ret = btrfs_update_inode(trans, root, inode); | ||
1929 | if (ret) { | ||
1930 | err = ret; | ||
1931 | break; | ||
1932 | } | ||
1933 | |||
1934 | nr = trans->blocks_used; | ||
1935 | btrfs_end_transaction(trans, root); | ||
1936 | btrfs_btree_balance_dirty(root, nr); | ||
1937 | |||
1938 | trans = btrfs_start_transaction(root, 3); | ||
1939 | if (IS_ERR(trans)) { | ||
1940 | ret = PTR_ERR(trans); | ||
1941 | trans = NULL; | ||
1942 | break; | ||
1943 | } | ||
1944 | |||
1945 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, | ||
1946 | rsv, min_size); | ||
1947 | BUG_ON(ret); /* shouldn't happen */ | ||
1948 | trans->block_rsv = rsv; | ||
1949 | } | ||
1950 | |||
1951 | if (ret) { | ||
1952 | err = ret; | ||
1953 | goto out_trans; | ||
1954 | } | ||
1955 | |||
1956 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
1957 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | ||
1958 | if (ret) { | ||
1959 | err = ret; | ||
1960 | goto out_trans; | ||
1961 | } | ||
1962 | |||
1963 | out_trans: | ||
1964 | if (!trans) | ||
1965 | goto out_free; | ||
1966 | |||
1967 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
1968 | ret = btrfs_update_inode(trans, root, inode); | ||
1969 | nr = trans->blocks_used; | ||
1970 | btrfs_end_transaction(trans, root); | ||
1971 | btrfs_btree_balance_dirty(root, nr); | ||
1972 | out_free: | ||
1973 | btrfs_free_path(path); | ||
1974 | btrfs_free_block_rsv(root, rsv); | ||
1975 | out: | ||
1976 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
1977 | &cached_state, GFP_NOFS); | ||
1978 | mutex_unlock(&inode->i_mutex); | ||
1979 | if (ret && !err) | ||
1980 | err = ret; | ||
1981 | return err; | ||
1982 | } | ||
1983 | |||
1618 | static long btrfs_fallocate(struct file *file, int mode, | 1984 | static long btrfs_fallocate(struct file *file, int mode, |
1619 | loff_t offset, loff_t len) | 1985 | loff_t offset, loff_t len) |
1620 | { | 1986 | { |
@@ -1633,15 +1999,18 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
1633 | alloc_start = offset & ~mask; | 1999 | alloc_start = offset & ~mask; |
1634 | alloc_end = (offset + len + mask) & ~mask; | 2000 | alloc_end = (offset + len + mask) & ~mask; |
1635 | 2001 | ||
1636 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | 2002 | /* Make sure we aren't being give some crap mode */ |
1637 | if (mode & ~FALLOC_FL_KEEP_SIZE) | 2003 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) |
1638 | return -EOPNOTSUPP; | 2004 | return -EOPNOTSUPP; |
1639 | 2005 | ||
2006 | if (mode & FALLOC_FL_PUNCH_HOLE) | ||
2007 | return btrfs_punch_hole(inode, offset, len); | ||
2008 | |||
1640 | /* | 2009 | /* |
1641 | * Make sure we have enough space before we do the | 2010 | * Make sure we have enough space before we do the |
1642 | * allocation. | 2011 | * allocation. |
1643 | */ | 2012 | */ |
1644 | ret = btrfs_check_data_free_space(inode, len); | 2013 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start + 1); |
1645 | if (ret) | 2014 | if (ret) |
1646 | return ret; | 2015 | return ret; |
1647 | 2016 | ||
@@ -1748,7 +2117,7 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
1748 | out: | 2117 | out: |
1749 | mutex_unlock(&inode->i_mutex); | 2118 | mutex_unlock(&inode->i_mutex); |
1750 | /* Let go of our reservation. */ | 2119 | /* Let go of our reservation. */ |
1751 | btrfs_free_reserved_data_space(inode, len); | 2120 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start + 1); |
1752 | return ret; | 2121 | return ret; |
1753 | } | 2122 | } |
1754 | 2123 | ||
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 6b10acfc2f5c..1027b854b90c 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -966,7 +966,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
966 | block_group->key.offset)) { | 966 | block_group->key.offset)) { |
967 | ret = find_first_extent_bit(unpin, start, | 967 | ret = find_first_extent_bit(unpin, start, |
968 | &extent_start, &extent_end, | 968 | &extent_start, &extent_end, |
969 | EXTENT_DIRTY); | 969 | EXTENT_DIRTY, NULL); |
970 | if (ret) { | 970 | if (ret) { |
971 | ret = 0; | 971 | ret = 0; |
972 | break; | 972 | break; |
@@ -1454,9 +1454,7 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl, | |||
1454 | max_t(u64, *offset, bitmap_info->offset)); | 1454 | max_t(u64, *offset, bitmap_info->offset)); |
1455 | bits = bytes_to_bits(*bytes, ctl->unit); | 1455 | bits = bytes_to_bits(*bytes, ctl->unit); |
1456 | 1456 | ||
1457 | for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); | 1457 | for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { |
1458 | i < BITS_PER_BITMAP; | ||
1459 | i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) { | ||
1460 | next_zero = find_next_zero_bit(bitmap_info->bitmap, | 1458 | next_zero = find_next_zero_bit(bitmap_info->bitmap, |
1461 | BITS_PER_BITMAP, i); | 1459 | BITS_PER_BITMAP, i); |
1462 | if ((next_zero - i) >= bits) { | 1460 | if ((next_zero - i) >= bits) { |
@@ -2307,9 +2305,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, | |||
2307 | 2305 | ||
2308 | again: | 2306 | again: |
2309 | found_bits = 0; | 2307 | found_bits = 0; |
2310 | for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i); | 2308 | for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { |
2311 | i < BITS_PER_BITMAP; | ||
2312 | i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { | ||
2313 | next_zero = find_next_zero_bit(entry->bitmap, | 2309 | next_zero = find_next_zero_bit(entry->bitmap, |
2314 | BITS_PER_BITMAP, i); | 2310 | BITS_PER_BITMAP, i); |
2315 | if (next_zero - i >= min_bits) { | 2311 | if (next_zero - i >= min_bits) { |
diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h index db2ff9773b99..1d982812ab67 100644 --- a/fs/btrfs/hash.h +++ b/fs/btrfs/hash.h | |||
@@ -24,4 +24,14 @@ static inline u64 btrfs_name_hash(const char *name, int len) | |||
24 | { | 24 | { |
25 | return crc32c((u32)~1, name, len); | 25 | return crc32c((u32)~1, name, len); |
26 | } | 26 | } |
27 | |||
28 | /* | ||
29 | * Figure the key offset of an extended inode ref | ||
30 | */ | ||
31 | static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name, | ||
32 | int len) | ||
33 | { | ||
34 | return (u64) crc32c(parent_objectid, name, len); | ||
35 | } | ||
36 | |||
27 | #endif | 37 | #endif |
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index a13cf1a96c73..48b8fda93132 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
@@ -18,6 +18,7 @@ | |||
18 | 18 | ||
19 | #include "ctree.h" | 19 | #include "ctree.h" |
20 | #include "disk-io.h" | 20 | #include "disk-io.h" |
21 | #include "hash.h" | ||
21 | #include "transaction.h" | 22 | #include "transaction.h" |
22 | #include "print-tree.h" | 23 | #include "print-tree.h" |
23 | 24 | ||
@@ -50,18 +51,57 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name, | |||
50 | return 0; | 51 | return 0; |
51 | } | 52 | } |
52 | 53 | ||
53 | struct btrfs_inode_ref * | 54 | int btrfs_find_name_in_ext_backref(struct btrfs_path *path, u64 ref_objectid, |
55 | const char *name, int name_len, | ||
56 | struct btrfs_inode_extref **extref_ret) | ||
57 | { | ||
58 | struct extent_buffer *leaf; | ||
59 | struct btrfs_inode_extref *extref; | ||
60 | unsigned long ptr; | ||
61 | unsigned long name_ptr; | ||
62 | u32 item_size; | ||
63 | u32 cur_offset = 0; | ||
64 | int ref_name_len; | ||
65 | |||
66 | leaf = path->nodes[0]; | ||
67 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
68 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
69 | |||
70 | /* | ||
71 | * Search all extended backrefs in this item. We're only | ||
72 | * looking through any collisions so most of the time this is | ||
73 | * just going to compare against one buffer. If all is well, | ||
74 | * we'll return success and the inode ref object. | ||
75 | */ | ||
76 | while (cur_offset < item_size) { | ||
77 | extref = (struct btrfs_inode_extref *) (ptr + cur_offset); | ||
78 | name_ptr = (unsigned long)(&extref->name); | ||
79 | ref_name_len = btrfs_inode_extref_name_len(leaf, extref); | ||
80 | |||
81 | if (ref_name_len == name_len && | ||
82 | btrfs_inode_extref_parent(leaf, extref) == ref_objectid && | ||
83 | (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)) { | ||
84 | if (extref_ret) | ||
85 | *extref_ret = extref; | ||
86 | return 1; | ||
87 | } | ||
88 | |||
89 | cur_offset += ref_name_len + sizeof(*extref); | ||
90 | } | ||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | static struct btrfs_inode_ref * | ||
54 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | 95 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, |
55 | struct btrfs_root *root, | 96 | struct btrfs_root *root, |
56 | struct btrfs_path *path, | 97 | struct btrfs_path *path, |
57 | const char *name, int name_len, | 98 | const char *name, int name_len, |
58 | u64 inode_objectid, u64 ref_objectid, int mod) | 99 | u64 inode_objectid, u64 ref_objectid, int ins_len, |
100 | int cow) | ||
59 | { | 101 | { |
102 | int ret; | ||
60 | struct btrfs_key key; | 103 | struct btrfs_key key; |
61 | struct btrfs_inode_ref *ref; | 104 | struct btrfs_inode_ref *ref; |
62 | int ins_len = mod < 0 ? -1 : 0; | ||
63 | int cow = mod != 0; | ||
64 | int ret; | ||
65 | 105 | ||
66 | key.objectid = inode_objectid; | 106 | key.objectid = inode_objectid; |
67 | key.type = BTRFS_INODE_REF_KEY; | 107 | key.type = BTRFS_INODE_REF_KEY; |
@@ -77,13 +117,150 @@ btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | |||
77 | return ref; | 117 | return ref; |
78 | } | 118 | } |
79 | 119 | ||
80 | int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | 120 | /* Returns NULL if no extref found */ |
121 | struct btrfs_inode_extref * | ||
122 | btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, | ||
123 | struct btrfs_root *root, | ||
124 | struct btrfs_path *path, | ||
125 | const char *name, int name_len, | ||
126 | u64 inode_objectid, u64 ref_objectid, int ins_len, | ||
127 | int cow) | ||
128 | { | ||
129 | int ret; | ||
130 | struct btrfs_key key; | ||
131 | struct btrfs_inode_extref *extref; | ||
132 | |||
133 | key.objectid = inode_objectid; | ||
134 | key.type = BTRFS_INODE_EXTREF_KEY; | ||
135 | key.offset = btrfs_extref_hash(ref_objectid, name, name_len); | ||
136 | |||
137 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | ||
138 | if (ret < 0) | ||
139 | return ERR_PTR(ret); | ||
140 | if (ret > 0) | ||
141 | return NULL; | ||
142 | if (!btrfs_find_name_in_ext_backref(path, ref_objectid, name, name_len, &extref)) | ||
143 | return NULL; | ||
144 | return extref; | ||
145 | } | ||
146 | |||
147 | int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans, | ||
148 | struct btrfs_root *root, | ||
149 | struct btrfs_path *path, | ||
150 | const char *name, int name_len, | ||
151 | u64 inode_objectid, u64 ref_objectid, int mod, | ||
152 | u64 *ret_index) | ||
153 | { | ||
154 | struct btrfs_inode_ref *ref; | ||
155 | struct btrfs_inode_extref *extref; | ||
156 | int ins_len = mod < 0 ? -1 : 0; | ||
157 | int cow = mod != 0; | ||
158 | |||
159 | ref = btrfs_lookup_inode_ref(trans, root, path, name, name_len, | ||
160 | inode_objectid, ref_objectid, ins_len, | ||
161 | cow); | ||
162 | if (IS_ERR(ref)) | ||
163 | return PTR_ERR(ref); | ||
164 | |||
165 | if (ref != NULL) { | ||
166 | *ret_index = btrfs_inode_ref_index(path->nodes[0], ref); | ||
167 | return 0; | ||
168 | } | ||
169 | |||
170 | btrfs_release_path(path); | ||
171 | |||
172 | extref = btrfs_lookup_inode_extref(trans, root, path, name, | ||
173 | name_len, inode_objectid, | ||
174 | ref_objectid, ins_len, cow); | ||
175 | if (IS_ERR(extref)) | ||
176 | return PTR_ERR(extref); | ||
177 | |||
178 | if (extref) { | ||
179 | *ret_index = btrfs_inode_extref_index(path->nodes[0], extref); | ||
180 | return 0; | ||
181 | } | ||
182 | |||
183 | return -ENOENT; | ||
184 | } | ||
185 | |||
186 | int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, | ||
81 | struct btrfs_root *root, | 187 | struct btrfs_root *root, |
82 | const char *name, int name_len, | 188 | const char *name, int name_len, |
83 | u64 inode_objectid, u64 ref_objectid, u64 *index) | 189 | u64 inode_objectid, u64 ref_objectid, u64 *index) |
84 | { | 190 | { |
85 | struct btrfs_path *path; | 191 | struct btrfs_path *path; |
86 | struct btrfs_key key; | 192 | struct btrfs_key key; |
193 | struct btrfs_inode_extref *extref; | ||
194 | struct extent_buffer *leaf; | ||
195 | int ret; | ||
196 | int del_len = name_len + sizeof(*extref); | ||
197 | unsigned long ptr; | ||
198 | unsigned long item_start; | ||
199 | u32 item_size; | ||
200 | |||
201 | key.objectid = inode_objectid; | ||
202 | btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY); | ||
203 | key.offset = btrfs_extref_hash(ref_objectid, name, name_len); | ||
204 | |||
205 | path = btrfs_alloc_path(); | ||
206 | if (!path) | ||
207 | return -ENOMEM; | ||
208 | |||
209 | path->leave_spinning = 1; | ||
210 | |||
211 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
212 | if (ret > 0) | ||
213 | ret = -ENOENT; | ||
214 | if (ret < 0) | ||
215 | goto out; | ||
216 | |||
217 | /* | ||
218 | * Sanity check - did we find the right item for this name? | ||
219 | * This should always succeed so error here will make the FS | ||
220 | * readonly. | ||
221 | */ | ||
222 | if (!btrfs_find_name_in_ext_backref(path, ref_objectid, | ||
223 | name, name_len, &extref)) { | ||
224 | btrfs_std_error(root->fs_info, -ENOENT); | ||
225 | ret = -EROFS; | ||
226 | goto out; | ||
227 | } | ||
228 | |||
229 | leaf = path->nodes[0]; | ||
230 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
231 | if (index) | ||
232 | *index = btrfs_inode_extref_index(leaf, extref); | ||
233 | |||
234 | if (del_len == item_size) { | ||
235 | /* | ||
236 | * Common case only one ref in the item, remove the | ||
237 | * whole item. | ||
238 | */ | ||
239 | ret = btrfs_del_item(trans, root, path); | ||
240 | goto out; | ||
241 | } | ||
242 | |||
243 | ptr = (unsigned long)extref; | ||
244 | item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
245 | |||
246 | memmove_extent_buffer(leaf, ptr, ptr + del_len, | ||
247 | item_size - (ptr + del_len - item_start)); | ||
248 | |||
249 | btrfs_truncate_item(trans, root, path, item_size - del_len, 1); | ||
250 | |||
251 | out: | ||
252 | btrfs_free_path(path); | ||
253 | |||
254 | return ret; | ||
255 | } | ||
256 | |||
257 | int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | ||
258 | struct btrfs_root *root, | ||
259 | const char *name, int name_len, | ||
260 | u64 inode_objectid, u64 ref_objectid, u64 *index) | ||
261 | { | ||
262 | struct btrfs_path *path; | ||
263 | struct btrfs_key key; | ||
87 | struct btrfs_inode_ref *ref; | 264 | struct btrfs_inode_ref *ref; |
88 | struct extent_buffer *leaf; | 265 | struct extent_buffer *leaf; |
89 | unsigned long ptr; | 266 | unsigned long ptr; |
@@ -91,6 +268,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
91 | u32 item_size; | 268 | u32 item_size; |
92 | u32 sub_item_len; | 269 | u32 sub_item_len; |
93 | int ret; | 270 | int ret; |
271 | int search_ext_refs = 0; | ||
94 | int del_len = name_len + sizeof(*ref); | 272 | int del_len = name_len + sizeof(*ref); |
95 | 273 | ||
96 | key.objectid = inode_objectid; | 274 | key.objectid = inode_objectid; |
@@ -106,12 +284,14 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
106 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 284 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
107 | if (ret > 0) { | 285 | if (ret > 0) { |
108 | ret = -ENOENT; | 286 | ret = -ENOENT; |
287 | search_ext_refs = 1; | ||
109 | goto out; | 288 | goto out; |
110 | } else if (ret < 0) { | 289 | } else if (ret < 0) { |
111 | goto out; | 290 | goto out; |
112 | } | 291 | } |
113 | if (!find_name_in_backref(path, name, name_len, &ref)) { | 292 | if (!find_name_in_backref(path, name, name_len, &ref)) { |
114 | ret = -ENOENT; | 293 | ret = -ENOENT; |
294 | search_ext_refs = 1; | ||
115 | goto out; | 295 | goto out; |
116 | } | 296 | } |
117 | leaf = path->nodes[0]; | 297 | leaf = path->nodes[0]; |
@@ -129,8 +309,78 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
129 | item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); | 309 | item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); |
130 | memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, | 310 | memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, |
131 | item_size - (ptr + sub_item_len - item_start)); | 311 | item_size - (ptr + sub_item_len - item_start)); |
132 | btrfs_truncate_item(trans, root, path, | 312 | btrfs_truncate_item(trans, root, path, item_size - sub_item_len, 1); |
133 | item_size - sub_item_len, 1); | 313 | out: |
314 | btrfs_free_path(path); | ||
315 | |||
316 | if (search_ext_refs) { | ||
317 | /* | ||
318 | * No refs were found, or we could not find the | ||
319 | * name in our ref array. Find and remove the extended | ||
320 | * inode ref then. | ||
321 | */ | ||
322 | return btrfs_del_inode_extref(trans, root, name, name_len, | ||
323 | inode_objectid, ref_objectid, index); | ||
324 | } | ||
325 | |||
326 | return ret; | ||
327 | } | ||
328 | |||
329 | /* | ||
330 | * btrfs_insert_inode_extref() - Inserts an extended inode ref into a tree. | ||
331 | * | ||
332 | * The caller must have checked against BTRFS_LINK_MAX already. | ||
333 | */ | ||
334 | static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, | ||
335 | struct btrfs_root *root, | ||
336 | const char *name, int name_len, | ||
337 | u64 inode_objectid, u64 ref_objectid, u64 index) | ||
338 | { | ||
339 | struct btrfs_inode_extref *extref; | ||
340 | int ret; | ||
341 | int ins_len = name_len + sizeof(*extref); | ||
342 | unsigned long ptr; | ||
343 | struct btrfs_path *path; | ||
344 | struct btrfs_key key; | ||
345 | struct extent_buffer *leaf; | ||
346 | struct btrfs_item *item; | ||
347 | |||
348 | key.objectid = inode_objectid; | ||
349 | key.type = BTRFS_INODE_EXTREF_KEY; | ||
350 | key.offset = btrfs_extref_hash(ref_objectid, name, name_len); | ||
351 | |||
352 | path = btrfs_alloc_path(); | ||
353 | if (!path) | ||
354 | return -ENOMEM; | ||
355 | |||
356 | path->leave_spinning = 1; | ||
357 | ret = btrfs_insert_empty_item(trans, root, path, &key, | ||
358 | ins_len); | ||
359 | if (ret == -EEXIST) { | ||
360 | if (btrfs_find_name_in_ext_backref(path, ref_objectid, | ||
361 | name, name_len, NULL)) | ||
362 | goto out; | ||
363 | |||
364 | btrfs_extend_item(trans, root, path, ins_len); | ||
365 | ret = 0; | ||
366 | } | ||
367 | if (ret < 0) | ||
368 | goto out; | ||
369 | |||
370 | leaf = path->nodes[0]; | ||
371 | item = btrfs_item_nr(leaf, path->slots[0]); | ||
372 | ptr = (unsigned long)btrfs_item_ptr(leaf, path->slots[0], char); | ||
373 | ptr += btrfs_item_size(leaf, item) - ins_len; | ||
374 | extref = (struct btrfs_inode_extref *)ptr; | ||
375 | |||
376 | btrfs_set_inode_extref_name_len(path->nodes[0], extref, name_len); | ||
377 | btrfs_set_inode_extref_index(path->nodes[0], extref, index); | ||
378 | btrfs_set_inode_extref_parent(path->nodes[0], extref, ref_objectid); | ||
379 | |||
380 | ptr = (unsigned long)&extref->name; | ||
381 | write_extent_buffer(path->nodes[0], name, ptr, name_len); | ||
382 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
383 | |||
134 | out: | 384 | out: |
135 | btrfs_free_path(path); | 385 | btrfs_free_path(path); |
136 | return ret; | 386 | return ret; |
@@ -191,6 +441,19 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | |||
191 | 441 | ||
192 | out: | 442 | out: |
193 | btrfs_free_path(path); | 443 | btrfs_free_path(path); |
444 | |||
445 | if (ret == -EMLINK) { | ||
446 | struct btrfs_super_block *disk_super = root->fs_info->super_copy; | ||
447 | /* We ran out of space in the ref array. Need to | ||
448 | * add an extended ref. */ | ||
449 | if (btrfs_super_incompat_flags(disk_super) | ||
450 | & BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) | ||
451 | ret = btrfs_insert_inode_extref(trans, root, name, | ||
452 | name_len, | ||
453 | inode_objectid, | ||
454 | ref_objectid, index); | ||
455 | } | ||
456 | |||
194 | return ret; | 457 | return ret; |
195 | } | 458 | } |
196 | 459 | ||
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a6ed6944e50c..85a1e5053fe6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -230,7 +230,6 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
230 | u64 inline_len = actual_end - start; | 230 | u64 inline_len = actual_end - start; |
231 | u64 aligned_end = (end + root->sectorsize - 1) & | 231 | u64 aligned_end = (end + root->sectorsize - 1) & |
232 | ~((u64)root->sectorsize - 1); | 232 | ~((u64)root->sectorsize - 1); |
233 | u64 hint_byte; | ||
234 | u64 data_len = inline_len; | 233 | u64 data_len = inline_len; |
235 | int ret; | 234 | int ret; |
236 | 235 | ||
@@ -247,8 +246,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
247 | return 1; | 246 | return 1; |
248 | } | 247 | } |
249 | 248 | ||
250 | ret = btrfs_drop_extents(trans, inode, start, aligned_end, | 249 | ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1); |
251 | &hint_byte, 1); | ||
252 | if (ret) | 250 | if (ret) |
253 | return ret; | 251 | return ret; |
254 | 252 | ||
@@ -664,7 +662,7 @@ retry: | |||
664 | async_extent->compressed_size, | 662 | async_extent->compressed_size, |
665 | async_extent->compressed_size, | 663 | async_extent->compressed_size, |
666 | 0, alloc_hint, &ins, 1); | 664 | 0, alloc_hint, &ins, 1); |
667 | if (ret) | 665 | if (ret && ret != -ENOSPC) |
668 | btrfs_abort_transaction(trans, root, ret); | 666 | btrfs_abort_transaction(trans, root, ret); |
669 | btrfs_end_transaction(trans, root); | 667 | btrfs_end_transaction(trans, root); |
670 | } | 668 | } |
@@ -1308,6 +1306,7 @@ out_check: | |||
1308 | em->block_start = disk_bytenr; | 1306 | em->block_start = disk_bytenr; |
1309 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 1307 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
1310 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 1308 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
1309 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); | ||
1311 | while (1) { | 1310 | while (1) { |
1312 | write_lock(&em_tree->lock); | 1311 | write_lock(&em_tree->lock); |
1313 | ret = add_extent_mapping(em_tree, em); | 1312 | ret = add_extent_mapping(em_tree, em); |
@@ -1364,11 +1363,7 @@ out_check: | |||
1364 | } | 1363 | } |
1365 | 1364 | ||
1366 | error: | 1365 | error: |
1367 | if (nolock) { | 1366 | err = btrfs_end_transaction(trans, root); |
1368 | err = btrfs_end_transaction_nolock(trans, root); | ||
1369 | } else { | ||
1370 | err = btrfs_end_transaction(trans, root); | ||
1371 | } | ||
1372 | if (!ret) | 1367 | if (!ret) |
1373 | ret = err; | 1368 | ret = err; |
1374 | 1369 | ||
@@ -1785,7 +1780,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1785 | struct btrfs_path *path; | 1780 | struct btrfs_path *path; |
1786 | struct extent_buffer *leaf; | 1781 | struct extent_buffer *leaf; |
1787 | struct btrfs_key ins; | 1782 | struct btrfs_key ins; |
1788 | u64 hint; | ||
1789 | int ret; | 1783 | int ret; |
1790 | 1784 | ||
1791 | path = btrfs_alloc_path(); | 1785 | path = btrfs_alloc_path(); |
@@ -1803,8 +1797,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1803 | * the caller is expected to unpin it and allow it to be merged | 1797 | * the caller is expected to unpin it and allow it to be merged |
1804 | * with the others. | 1798 | * with the others. |
1805 | */ | 1799 | */ |
1806 | ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes, | 1800 | ret = btrfs_drop_extents(trans, root, inode, file_pos, |
1807 | &hint, 0); | 1801 | file_pos + num_bytes, 0); |
1808 | if (ret) | 1802 | if (ret) |
1809 | goto out; | 1803 | goto out; |
1810 | 1804 | ||
@@ -1828,10 +1822,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1828 | btrfs_set_file_extent_encryption(leaf, fi, encryption); | 1822 | btrfs_set_file_extent_encryption(leaf, fi, encryption); |
1829 | btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding); | 1823 | btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding); |
1830 | 1824 | ||
1831 | btrfs_unlock_up_safe(path, 1); | ||
1832 | btrfs_set_lock_blocking(leaf); | ||
1833 | |||
1834 | btrfs_mark_buffer_dirty(leaf); | 1825 | btrfs_mark_buffer_dirty(leaf); |
1826 | btrfs_release_path(path); | ||
1835 | 1827 | ||
1836 | inode_add_bytes(inode, num_bytes); | 1828 | inode_add_bytes(inode, num_bytes); |
1837 | 1829 | ||
@@ -1929,11 +1921,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) | |||
1929 | ordered_extent->len, | 1921 | ordered_extent->len, |
1930 | compress_type, 0, 0, | 1922 | compress_type, 0, 0, |
1931 | BTRFS_FILE_EXTENT_REG); | 1923 | BTRFS_FILE_EXTENT_REG); |
1932 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | ||
1933 | ordered_extent->file_offset, | ||
1934 | ordered_extent->len); | ||
1935 | } | 1924 | } |
1936 | 1925 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | |
1926 | ordered_extent->file_offset, ordered_extent->len, | ||
1927 | trans->transid); | ||
1937 | if (ret < 0) { | 1928 | if (ret < 0) { |
1938 | btrfs_abort_transaction(trans, root, ret); | 1929 | btrfs_abort_transaction(trans, root, ret); |
1939 | goto out_unlock; | 1930 | goto out_unlock; |
@@ -1949,6 +1940,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) | |||
1949 | btrfs_abort_transaction(trans, root, ret); | 1940 | btrfs_abort_transaction(trans, root, ret); |
1950 | goto out_unlock; | 1941 | goto out_unlock; |
1951 | } | 1942 | } |
1943 | } else { | ||
1944 | btrfs_set_inode_last_trans(trans, inode); | ||
1952 | } | 1945 | } |
1953 | ret = 0; | 1946 | ret = 0; |
1954 | out_unlock: | 1947 | out_unlock: |
@@ -1958,12 +1951,8 @@ out_unlock: | |||
1958 | out: | 1951 | out: |
1959 | if (root != root->fs_info->tree_root) | 1952 | if (root != root->fs_info->tree_root) |
1960 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); | 1953 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); |
1961 | if (trans) { | 1954 | if (trans) |
1962 | if (nolock) | 1955 | btrfs_end_transaction(trans, root); |
1963 | btrfs_end_transaction_nolock(trans, root); | ||
1964 | else | ||
1965 | btrfs_end_transaction(trans, root); | ||
1966 | } | ||
1967 | 1956 | ||
1968 | if (ret) | 1957 | if (ret) |
1969 | clear_extent_uptodate(io_tree, ordered_extent->file_offset, | 1958 | clear_extent_uptodate(io_tree, ordered_extent->file_offset, |
@@ -2119,7 +2108,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) | |||
2119 | if (empty) | 2108 | if (empty) |
2120 | return; | 2109 | return; |
2121 | 2110 | ||
2122 | down_read(&root->fs_info->cleanup_work_sem); | ||
2123 | spin_lock(&fs_info->delayed_iput_lock); | 2111 | spin_lock(&fs_info->delayed_iput_lock); |
2124 | list_splice_init(&fs_info->delayed_iputs, &list); | 2112 | list_splice_init(&fs_info->delayed_iputs, &list); |
2125 | spin_unlock(&fs_info->delayed_iput_lock); | 2113 | spin_unlock(&fs_info->delayed_iput_lock); |
@@ -2130,7 +2118,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) | |||
2130 | iput(delayed->inode); | 2118 | iput(delayed->inode); |
2131 | kfree(delayed); | 2119 | kfree(delayed); |
2132 | } | 2120 | } |
2133 | up_read(&root->fs_info->cleanup_work_sem); | ||
2134 | } | 2121 | } |
2135 | 2122 | ||
2136 | enum btrfs_orphan_cleanup_state { | 2123 | enum btrfs_orphan_cleanup_state { |
@@ -2198,7 +2185,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2198 | int ret; | 2185 | int ret; |
2199 | 2186 | ||
2200 | if (!root->orphan_block_rsv) { | 2187 | if (!root->orphan_block_rsv) { |
2201 | block_rsv = btrfs_alloc_block_rsv(root); | 2188 | block_rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); |
2202 | if (!block_rsv) | 2189 | if (!block_rsv) |
2203 | return -ENOMEM; | 2190 | return -ENOMEM; |
2204 | } | 2191 | } |
@@ -2225,7 +2212,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2225 | insert = 1; | 2212 | insert = 1; |
2226 | #endif | 2213 | #endif |
2227 | insert = 1; | 2214 | insert = 1; |
2228 | atomic_dec(&root->orphan_inodes); | 2215 | atomic_inc(&root->orphan_inodes); |
2229 | } | 2216 | } |
2230 | 2217 | ||
2231 | if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED, | 2218 | if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED, |
@@ -2590,6 +2577,18 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2590 | 2577 | ||
2591 | inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); | 2578 | inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); |
2592 | BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); | 2579 | BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); |
2580 | BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item); | ||
2581 | |||
2582 | /* | ||
2583 | * If we were modified in the current generation and evicted from memory | ||
2584 | * and then re-read we need to do a full sync since we don't have any | ||
2585 | * idea about which extents were modified before we were evicted from | ||
2586 | * cache. | ||
2587 | */ | ||
2588 | if (BTRFS_I(inode)->last_trans == root->fs_info->generation) | ||
2589 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
2590 | &BTRFS_I(inode)->runtime_flags); | ||
2591 | |||
2593 | inode->i_version = btrfs_inode_sequence(leaf, inode_item); | 2592 | inode->i_version = btrfs_inode_sequence(leaf, inode_item); |
2594 | inode->i_generation = BTRFS_I(inode)->generation; | 2593 | inode->i_generation = BTRFS_I(inode)->generation; |
2595 | inode->i_rdev = 0; | 2594 | inode->i_rdev = 0; |
@@ -2894,7 +2893,6 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
2894 | struct btrfs_trans_handle *trans; | 2893 | struct btrfs_trans_handle *trans; |
2895 | struct btrfs_root *root = BTRFS_I(dir)->root; | 2894 | struct btrfs_root *root = BTRFS_I(dir)->root; |
2896 | struct btrfs_path *path; | 2895 | struct btrfs_path *path; |
2897 | struct btrfs_inode_ref *ref; | ||
2898 | struct btrfs_dir_item *di; | 2896 | struct btrfs_dir_item *di; |
2899 | struct inode *inode = dentry->d_inode; | 2897 | struct inode *inode = dentry->d_inode; |
2900 | u64 index; | 2898 | u64 index; |
@@ -3008,17 +3006,17 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
3008 | } | 3006 | } |
3009 | btrfs_release_path(path); | 3007 | btrfs_release_path(path); |
3010 | 3008 | ||
3011 | ref = btrfs_lookup_inode_ref(trans, root, path, | 3009 | ret = btrfs_get_inode_ref_index(trans, root, path, dentry->d_name.name, |
3012 | dentry->d_name.name, dentry->d_name.len, | 3010 | dentry->d_name.len, ino, dir_ino, 0, |
3013 | ino, dir_ino, 0); | 3011 | &index); |
3014 | if (IS_ERR(ref)) { | 3012 | if (ret) { |
3015 | err = PTR_ERR(ref); | 3013 | err = ret; |
3016 | goto out; | 3014 | goto out; |
3017 | } | 3015 | } |
3018 | BUG_ON(!ref); /* Logic error */ | 3016 | |
3019 | if (check_path_shared(root, path)) | 3017 | if (check_path_shared(root, path)) |
3020 | goto out; | 3018 | goto out; |
3021 | index = btrfs_inode_ref_index(path->nodes[0], ref); | 3019 | |
3022 | btrfs_release_path(path); | 3020 | btrfs_release_path(path); |
3023 | 3021 | ||
3024 | /* | 3022 | /* |
@@ -3061,7 +3059,7 @@ out: | |||
3061 | static void __unlink_end_trans(struct btrfs_trans_handle *trans, | 3059 | static void __unlink_end_trans(struct btrfs_trans_handle *trans, |
3062 | struct btrfs_root *root) | 3060 | struct btrfs_root *root) |
3063 | { | 3061 | { |
3064 | if (trans->block_rsv == &root->fs_info->global_block_rsv) { | 3062 | if (trans->block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL) { |
3065 | btrfs_block_rsv_release(root, trans->block_rsv, | 3063 | btrfs_block_rsv_release(root, trans->block_rsv, |
3066 | trans->bytes_reserved); | 3064 | trans->bytes_reserved); |
3067 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 3065 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
@@ -3191,9 +3189,10 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
3191 | struct btrfs_trans_handle *trans; | 3189 | struct btrfs_trans_handle *trans; |
3192 | unsigned long nr = 0; | 3190 | unsigned long nr = 0; |
3193 | 3191 | ||
3194 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || | 3192 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) |
3195 | btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) | ||
3196 | return -ENOTEMPTY; | 3193 | return -ENOTEMPTY; |
3194 | if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) | ||
3195 | return -EPERM; | ||
3197 | 3196 | ||
3198 | trans = __unlink_start_trans(dir, dentry); | 3197 | trans = __unlink_start_trans(dir, dentry); |
3199 | if (IS_ERR(trans)) | 3198 | if (IS_ERR(trans)) |
@@ -3267,8 +3266,13 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
3267 | return -ENOMEM; | 3266 | return -ENOMEM; |
3268 | path->reada = -1; | 3267 | path->reada = -1; |
3269 | 3268 | ||
3269 | /* | ||
3270 | * We want to drop from the next block forward in case this new size is | ||
3271 | * not block aligned since we will be keeping the last block of the | ||
3272 | * extent just the way it is. | ||
3273 | */ | ||
3270 | if (root->ref_cows || root == root->fs_info->tree_root) | 3274 | if (root->ref_cows || root == root->fs_info->tree_root) |
3271 | btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); | 3275 | btrfs_drop_extent_cache(inode, (new_size + mask) & (~mask), (u64)-1, 0); |
3272 | 3276 | ||
3273 | /* | 3277 | /* |
3274 | * This function is also used to drop the items in the log tree before | 3278 | * This function is also used to drop the items in the log tree before |
@@ -3429,12 +3433,6 @@ delete: | |||
3429 | 3433 | ||
3430 | if (path->slots[0] == 0 || | 3434 | if (path->slots[0] == 0 || |
3431 | path->slots[0] != pending_del_slot) { | 3435 | path->slots[0] != pending_del_slot) { |
3432 | if (root->ref_cows && | ||
3433 | BTRFS_I(inode)->location.objectid != | ||
3434 | BTRFS_FREE_INO_OBJECTID) { | ||
3435 | err = -EAGAIN; | ||
3436 | goto out; | ||
3437 | } | ||
3438 | if (pending_del_nr) { | 3436 | if (pending_del_nr) { |
3439 | ret = btrfs_del_items(trans, root, path, | 3437 | ret = btrfs_del_items(trans, root, path, |
3440 | pending_del_slot, | 3438 | pending_del_slot, |
@@ -3465,12 +3463,20 @@ error: | |||
3465 | } | 3463 | } |
3466 | 3464 | ||
3467 | /* | 3465 | /* |
3468 | * taken from block_truncate_page, but does cow as it zeros out | 3466 | * btrfs_truncate_page - read, zero a chunk and write a page |
3469 | * any bytes left in the last page in the file. | 3467 | * @inode - inode that we're zeroing |
3468 | * @from - the offset to start zeroing | ||
3469 | * @len - the length to zero, 0 to zero the entire range respective to the | ||
3470 | * offset | ||
3471 | * @front - zero up to the offset instead of from the offset on | ||
3472 | * | ||
3473 | * This will find the page for the "from" offset and cow the page and zero the | ||
3474 | * part we want to zero. This is used with truncate and hole punching. | ||
3470 | */ | 3475 | */ |
3471 | static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | 3476 | int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len, |
3477 | int front) | ||
3472 | { | 3478 | { |
3473 | struct inode *inode = mapping->host; | 3479 | struct address_space *mapping = inode->i_mapping; |
3474 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3480 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3475 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 3481 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
3476 | struct btrfs_ordered_extent *ordered; | 3482 | struct btrfs_ordered_extent *ordered; |
@@ -3485,7 +3491,8 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
3485 | u64 page_start; | 3491 | u64 page_start; |
3486 | u64 page_end; | 3492 | u64 page_end; |
3487 | 3493 | ||
3488 | if ((offset & (blocksize - 1)) == 0) | 3494 | if ((offset & (blocksize - 1)) == 0 && |
3495 | (!len || ((len & (blocksize - 1)) == 0))) | ||
3489 | goto out; | 3496 | goto out; |
3490 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); | 3497 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
3491 | if (ret) | 3498 | if (ret) |
@@ -3532,7 +3539,8 @@ again: | |||
3532 | } | 3539 | } |
3533 | 3540 | ||
3534 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, | 3541 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, |
3535 | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, | 3542 | EXTENT_DIRTY | EXTENT_DELALLOC | |
3543 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, | ||
3536 | 0, 0, &cached_state, GFP_NOFS); | 3544 | 0, 0, &cached_state, GFP_NOFS); |
3537 | 3545 | ||
3538 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end, | 3546 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end, |
@@ -3545,8 +3553,13 @@ again: | |||
3545 | 3553 | ||
3546 | ret = 0; | 3554 | ret = 0; |
3547 | if (offset != PAGE_CACHE_SIZE) { | 3555 | if (offset != PAGE_CACHE_SIZE) { |
3556 | if (!len) | ||
3557 | len = PAGE_CACHE_SIZE - offset; | ||
3548 | kaddr = kmap(page); | 3558 | kaddr = kmap(page); |
3549 | memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); | 3559 | if (front) |
3560 | memset(kaddr, 0, offset); | ||
3561 | else | ||
3562 | memset(kaddr + offset, 0, len); | ||
3550 | flush_dcache_page(page); | 3563 | flush_dcache_page(page); |
3551 | kunmap(page); | 3564 | kunmap(page); |
3552 | } | 3565 | } |
@@ -3577,6 +3590,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
3577 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 3590 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
3578 | struct extent_map *em = NULL; | 3591 | struct extent_map *em = NULL; |
3579 | struct extent_state *cached_state = NULL; | 3592 | struct extent_state *cached_state = NULL; |
3593 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
3580 | u64 mask = root->sectorsize - 1; | 3594 | u64 mask = root->sectorsize - 1; |
3581 | u64 hole_start = (oldsize + mask) & ~mask; | 3595 | u64 hole_start = (oldsize + mask) & ~mask; |
3582 | u64 block_end = (size + mask) & ~mask; | 3596 | u64 block_end = (size + mask) & ~mask; |
@@ -3613,7 +3627,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
3613 | last_byte = min(extent_map_end(em), block_end); | 3627 | last_byte = min(extent_map_end(em), block_end); |
3614 | last_byte = (last_byte + mask) & ~mask; | 3628 | last_byte = (last_byte + mask) & ~mask; |
3615 | if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { | 3629 | if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { |
3616 | u64 hint_byte = 0; | 3630 | struct extent_map *hole_em; |
3617 | hole_size = last_byte - cur_offset; | 3631 | hole_size = last_byte - cur_offset; |
3618 | 3632 | ||
3619 | trans = btrfs_start_transaction(root, 3); | 3633 | trans = btrfs_start_transaction(root, 3); |
@@ -3622,9 +3636,9 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
3622 | break; | 3636 | break; |
3623 | } | 3637 | } |
3624 | 3638 | ||
3625 | err = btrfs_drop_extents(trans, inode, cur_offset, | 3639 | err = btrfs_drop_extents(trans, root, inode, |
3626 | cur_offset + hole_size, | 3640 | cur_offset, |
3627 | &hint_byte, 1); | 3641 | cur_offset + hole_size, 1); |
3628 | if (err) { | 3642 | if (err) { |
3629 | btrfs_abort_transaction(trans, root, err); | 3643 | btrfs_abort_transaction(trans, root, err); |
3630 | btrfs_end_transaction(trans, root); | 3644 | btrfs_end_transaction(trans, root); |
@@ -3641,9 +3655,39 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
3641 | break; | 3655 | break; |
3642 | } | 3656 | } |
3643 | 3657 | ||
3644 | btrfs_drop_extent_cache(inode, hole_start, | 3658 | btrfs_drop_extent_cache(inode, cur_offset, |
3645 | last_byte - 1, 0); | 3659 | cur_offset + hole_size - 1, 0); |
3660 | hole_em = alloc_extent_map(); | ||
3661 | if (!hole_em) { | ||
3662 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
3663 | &BTRFS_I(inode)->runtime_flags); | ||
3664 | goto next; | ||
3665 | } | ||
3666 | hole_em->start = cur_offset; | ||
3667 | hole_em->len = hole_size; | ||
3668 | hole_em->orig_start = cur_offset; | ||
3646 | 3669 | ||
3670 | hole_em->block_start = EXTENT_MAP_HOLE; | ||
3671 | hole_em->block_len = 0; | ||
3672 | hole_em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
3673 | hole_em->compress_type = BTRFS_COMPRESS_NONE; | ||
3674 | hole_em->generation = trans->transid; | ||
3675 | |||
3676 | while (1) { | ||
3677 | write_lock(&em_tree->lock); | ||
3678 | err = add_extent_mapping(em_tree, hole_em); | ||
3679 | if (!err) | ||
3680 | list_move(&hole_em->list, | ||
3681 | &em_tree->modified_extents); | ||
3682 | write_unlock(&em_tree->lock); | ||
3683 | if (err != -EEXIST) | ||
3684 | break; | ||
3685 | btrfs_drop_extent_cache(inode, cur_offset, | ||
3686 | cur_offset + | ||
3687 | hole_size - 1, 0); | ||
3688 | } | ||
3689 | free_extent_map(hole_em); | ||
3690 | next: | ||
3647 | btrfs_update_inode(trans, root, inode); | 3691 | btrfs_update_inode(trans, root, inode); |
3648 | btrfs_end_transaction(trans, root); | 3692 | btrfs_end_transaction(trans, root); |
3649 | } | 3693 | } |
@@ -3768,26 +3812,22 @@ void btrfs_evict_inode(struct inode *inode) | |||
3768 | goto no_delete; | 3812 | goto no_delete; |
3769 | } | 3813 | } |
3770 | 3814 | ||
3771 | rsv = btrfs_alloc_block_rsv(root); | 3815 | rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); |
3772 | if (!rsv) { | 3816 | if (!rsv) { |
3773 | btrfs_orphan_del(NULL, inode); | 3817 | btrfs_orphan_del(NULL, inode); |
3774 | goto no_delete; | 3818 | goto no_delete; |
3775 | } | 3819 | } |
3776 | rsv->size = min_size; | 3820 | rsv->size = min_size; |
3821 | rsv->failfast = 1; | ||
3777 | global_rsv = &root->fs_info->global_block_rsv; | 3822 | global_rsv = &root->fs_info->global_block_rsv; |
3778 | 3823 | ||
3779 | btrfs_i_size_write(inode, 0); | 3824 | btrfs_i_size_write(inode, 0); |
3780 | 3825 | ||
3781 | /* | 3826 | /* |
3782 | * This is a bit simpler than btrfs_truncate since | 3827 | * This is a bit simpler than btrfs_truncate since we've already |
3783 | * | 3828 | * reserved our space for our orphan item in the unlink, so we just |
3784 | * 1) We've already reserved our space for our orphan item in the | 3829 | * need to reserve some slack space in case we add bytes and update |
3785 | * unlink. | 3830 | * inode item when doing the truncate. |
3786 | * 2) We're going to delete the inode item, so we don't need to update | ||
3787 | * it at all. | ||
3788 | * | ||
3789 | * So we just need to reserve some slack space in case we add bytes when | ||
3790 | * doing the truncate. | ||
3791 | */ | 3831 | */ |
3792 | while (1) { | 3832 | while (1) { |
3793 | ret = btrfs_block_rsv_refill_noflush(root, rsv, min_size); | 3833 | ret = btrfs_block_rsv_refill_noflush(root, rsv, min_size); |
@@ -3808,7 +3848,7 @@ void btrfs_evict_inode(struct inode *inode) | |||
3808 | goto no_delete; | 3848 | goto no_delete; |
3809 | } | 3849 | } |
3810 | 3850 | ||
3811 | trans = btrfs_start_transaction(root, 0); | 3851 | trans = btrfs_start_transaction_noflush(root, 1); |
3812 | if (IS_ERR(trans)) { | 3852 | if (IS_ERR(trans)) { |
3813 | btrfs_orphan_del(NULL, inode); | 3853 | btrfs_orphan_del(NULL, inode); |
3814 | btrfs_free_block_rsv(root, rsv); | 3854 | btrfs_free_block_rsv(root, rsv); |
@@ -3818,9 +3858,13 @@ void btrfs_evict_inode(struct inode *inode) | |||
3818 | trans->block_rsv = rsv; | 3858 | trans->block_rsv = rsv; |
3819 | 3859 | ||
3820 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); | 3860 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); |
3821 | if (ret != -EAGAIN) | 3861 | if (ret != -ENOSPC) |
3822 | break; | 3862 | break; |
3823 | 3863 | ||
3864 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
3865 | ret = btrfs_update_inode(trans, root, inode); | ||
3866 | BUG_ON(ret); | ||
3867 | |||
3824 | nr = trans->blocks_used; | 3868 | nr = trans->blocks_used; |
3825 | btrfs_end_transaction(trans, root); | 3869 | btrfs_end_transaction(trans, root); |
3826 | trans = NULL; | 3870 | trans = NULL; |
@@ -4470,10 +4514,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
4470 | trans = btrfs_join_transaction(root); | 4514 | trans = btrfs_join_transaction(root); |
4471 | if (IS_ERR(trans)) | 4515 | if (IS_ERR(trans)) |
4472 | return PTR_ERR(trans); | 4516 | return PTR_ERR(trans); |
4473 | if (nolock) | 4517 | ret = btrfs_commit_transaction(trans, root); |
4474 | ret = btrfs_end_transaction_nolock(trans, root); | ||
4475 | else | ||
4476 | ret = btrfs_commit_transaction(trans, root); | ||
4477 | } | 4518 | } |
4478 | return ret; | 4519 | return ret; |
4479 | } | 4520 | } |
@@ -4671,6 +4712,14 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4671 | BTRFS_I(inode)->generation = trans->transid; | 4712 | BTRFS_I(inode)->generation = trans->transid; |
4672 | inode->i_generation = BTRFS_I(inode)->generation; | 4713 | inode->i_generation = BTRFS_I(inode)->generation; |
4673 | 4714 | ||
4715 | /* | ||
4716 | * We could have gotten an inode number from somebody who was fsynced | ||
4717 | * and then removed in this same transaction, so let's just set full | ||
4718 | * sync since it will be a full sync anyway and this will blow away the | ||
4719 | * old info in the log. | ||
4720 | */ | ||
4721 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); | ||
4722 | |||
4674 | if (S_ISDIR(mode)) | 4723 | if (S_ISDIR(mode)) |
4675 | owner = 0; | 4724 | owner = 0; |
4676 | else | 4725 | else |
@@ -4680,6 +4729,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4680 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); | 4729 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); |
4681 | key[0].offset = 0; | 4730 | key[0].offset = 0; |
4682 | 4731 | ||
4732 | /* | ||
4733 | * Start new inodes with an inode_ref. This is slightly more | ||
4734 | * efficient for small numbers of hard links since they will | ||
4735 | * be packed into one item. Extended refs will kick in if we | ||
4736 | * add more hard links than can fit in the ref item. | ||
4737 | */ | ||
4683 | key[1].objectid = objectid; | 4738 | key[1].objectid = objectid; |
4684 | btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY); | 4739 | btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY); |
4685 | key[1].offset = ref_objectid; | 4740 | key[1].offset = ref_objectid; |
@@ -4986,7 +5041,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4986 | if (root->objectid != BTRFS_I(inode)->root->objectid) | 5041 | if (root->objectid != BTRFS_I(inode)->root->objectid) |
4987 | return -EXDEV; | 5042 | return -EXDEV; |
4988 | 5043 | ||
4989 | if (inode->i_nlink == ~0U) | 5044 | if (inode->i_nlink >= BTRFS_LINK_MAX) |
4990 | return -EMLINK; | 5045 | return -EMLINK; |
4991 | 5046 | ||
4992 | err = btrfs_set_inode_index(dir, &index); | 5047 | err = btrfs_set_inode_index(dir, &index); |
@@ -5450,7 +5505,8 @@ insert: | |||
5450 | write_unlock(&em_tree->lock); | 5505 | write_unlock(&em_tree->lock); |
5451 | out: | 5506 | out: |
5452 | 5507 | ||
5453 | trace_btrfs_get_extent(root, em); | 5508 | if (em) |
5509 | trace_btrfs_get_extent(root, em); | ||
5454 | 5510 | ||
5455 | if (path) | 5511 | if (path) |
5456 | btrfs_free_path(path); | 5512 | btrfs_free_path(path); |
@@ -5836,6 +5892,48 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, | |||
5836 | return ret; | 5892 | return ret; |
5837 | } | 5893 | } |
5838 | 5894 | ||
5895 | static struct extent_map *create_pinned_em(struct inode *inode, u64 start, | ||
5896 | u64 len, u64 orig_start, | ||
5897 | u64 block_start, u64 block_len, | ||
5898 | int type) | ||
5899 | { | ||
5900 | struct extent_map_tree *em_tree; | ||
5901 | struct extent_map *em; | ||
5902 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5903 | int ret; | ||
5904 | |||
5905 | em_tree = &BTRFS_I(inode)->extent_tree; | ||
5906 | em = alloc_extent_map(); | ||
5907 | if (!em) | ||
5908 | return ERR_PTR(-ENOMEM); | ||
5909 | |||
5910 | em->start = start; | ||
5911 | em->orig_start = orig_start; | ||
5912 | em->len = len; | ||
5913 | em->block_len = block_len; | ||
5914 | em->block_start = block_start; | ||
5915 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
5916 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
5917 | if (type == BTRFS_ORDERED_PREALLOC) | ||
5918 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); | ||
5919 | |||
5920 | do { | ||
5921 | btrfs_drop_extent_cache(inode, em->start, | ||
5922 | em->start + em->len - 1, 0); | ||
5923 | write_lock(&em_tree->lock); | ||
5924 | ret = add_extent_mapping(em_tree, em); | ||
5925 | write_unlock(&em_tree->lock); | ||
5926 | } while (ret == -EEXIST); | ||
5927 | |||
5928 | if (ret) { | ||
5929 | free_extent_map(em); | ||
5930 | return ERR_PTR(ret); | ||
5931 | } | ||
5932 | |||
5933 | return em; | ||
5934 | } | ||
5935 | |||
5936 | |||
5839 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | 5937 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, |
5840 | struct buffer_head *bh_result, int create) | 5938 | struct buffer_head *bh_result, int create) |
5841 | { | 5939 | { |
@@ -5950,6 +6048,19 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
5950 | goto must_cow; | 6048 | goto must_cow; |
5951 | 6049 | ||
5952 | if (can_nocow_odirect(trans, inode, start, len) == 1) { | 6050 | if (can_nocow_odirect(trans, inode, start, len) == 1) { |
6051 | u64 orig_start = em->start; | ||
6052 | |||
6053 | if (type == BTRFS_ORDERED_PREALLOC) { | ||
6054 | free_extent_map(em); | ||
6055 | em = create_pinned_em(inode, start, len, | ||
6056 | orig_start, | ||
6057 | block_start, len, type); | ||
6058 | if (IS_ERR(em)) { | ||
6059 | btrfs_end_transaction(trans, root); | ||
6060 | goto unlock_err; | ||
6061 | } | ||
6062 | } | ||
6063 | |||
5953 | ret = btrfs_add_ordered_extent_dio(inode, start, | 6064 | ret = btrfs_add_ordered_extent_dio(inode, start, |
5954 | block_start, len, len, type); | 6065 | block_start, len, len, type); |
5955 | btrfs_end_transaction(trans, root); | 6066 | btrfs_end_transaction(trans, root); |
@@ -5999,7 +6110,8 @@ unlock: | |||
5999 | if (lockstart < lockend) { | 6110 | if (lockstart < lockend) { |
6000 | if (create && len < lockend - lockstart) { | 6111 | if (create && len < lockend - lockstart) { |
6001 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | 6112 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, |
6002 | lockstart + len - 1, unlock_bits, 1, 0, | 6113 | lockstart + len - 1, |
6114 | unlock_bits | EXTENT_DEFRAG, 1, 0, | ||
6003 | &cached_state, GFP_NOFS); | 6115 | &cached_state, GFP_NOFS); |
6004 | /* | 6116 | /* |
6005 | * Beside unlock, we also need to cleanup reserved space | 6117 | * Beside unlock, we also need to cleanup reserved space |
@@ -6007,8 +6119,8 @@ unlock: | |||
6007 | */ | 6119 | */ |
6008 | clear_extent_bit(&BTRFS_I(inode)->io_tree, | 6120 | clear_extent_bit(&BTRFS_I(inode)->io_tree, |
6009 | lockstart + len, lockend, | 6121 | lockstart + len, lockend, |
6010 | unlock_bits | EXTENT_DO_ACCOUNTING, | 6122 | unlock_bits | EXTENT_DO_ACCOUNTING | |
6011 | 1, 0, NULL, GFP_NOFS); | 6123 | EXTENT_DEFRAG, 1, 0, NULL, GFP_NOFS); |
6012 | } else { | 6124 | } else { |
6013 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | 6125 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, |
6014 | lockend, unlock_bits, 1, 0, | 6126 | lockend, unlock_bits, 1, 0, |
@@ -6573,8 +6685,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
6573 | */ | 6685 | */ |
6574 | clear_extent_bit(tree, page_start, page_end, | 6686 | clear_extent_bit(tree, page_start, page_end, |
6575 | EXTENT_DIRTY | EXTENT_DELALLOC | | 6687 | EXTENT_DIRTY | EXTENT_DELALLOC | |
6576 | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0, | 6688 | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | |
6577 | &cached_state, GFP_NOFS); | 6689 | EXTENT_DEFRAG, 1, 0, &cached_state, GFP_NOFS); |
6578 | /* | 6690 | /* |
6579 | * whoever cleared the private bit is responsible | 6691 | * whoever cleared the private bit is responsible |
6580 | * for the finish_ordered_io | 6692 | * for the finish_ordered_io |
@@ -6590,7 +6702,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
6590 | } | 6702 | } |
6591 | clear_extent_bit(tree, page_start, page_end, | 6703 | clear_extent_bit(tree, page_start, page_end, |
6592 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | | 6704 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | |
6593 | EXTENT_DO_ACCOUNTING, 1, 1, &cached_state, GFP_NOFS); | 6705 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1, |
6706 | &cached_state, GFP_NOFS); | ||
6594 | __btrfs_releasepage(page, GFP_NOFS); | 6707 | __btrfs_releasepage(page, GFP_NOFS); |
6595 | 6708 | ||
6596 | ClearPageChecked(page); | 6709 | ClearPageChecked(page); |
@@ -6687,7 +6800,8 @@ again: | |||
6687 | * prepare_pages in the normal write path. | 6800 | * prepare_pages in the normal write path. |
6688 | */ | 6801 | */ |
6689 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, | 6802 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, |
6690 | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, | 6803 | EXTENT_DIRTY | EXTENT_DELALLOC | |
6804 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, | ||
6691 | 0, 0, &cached_state, GFP_NOFS); | 6805 | 0, 0, &cached_state, GFP_NOFS); |
6692 | 6806 | ||
6693 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end, | 6807 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end, |
@@ -6718,6 +6832,7 @@ again: | |||
6718 | 6832 | ||
6719 | BTRFS_I(inode)->last_trans = root->fs_info->generation; | 6833 | BTRFS_I(inode)->last_trans = root->fs_info->generation; |
6720 | BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; | 6834 | BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; |
6835 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit; | ||
6721 | 6836 | ||
6722 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); | 6837 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); |
6723 | 6838 | ||
@@ -6745,7 +6860,7 @@ static int btrfs_truncate(struct inode *inode) | |||
6745 | u64 mask = root->sectorsize - 1; | 6860 | u64 mask = root->sectorsize - 1; |
6746 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | 6861 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); |
6747 | 6862 | ||
6748 | ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); | 6863 | ret = btrfs_truncate_page(inode, inode->i_size, 0, 0); |
6749 | if (ret) | 6864 | if (ret) |
6750 | return ret; | 6865 | return ret; |
6751 | 6866 | ||
@@ -6788,10 +6903,11 @@ static int btrfs_truncate(struct inode *inode) | |||
6788 | * 3) fs_info->trans_block_rsv - this will have 1 items worth left for | 6903 | * 3) fs_info->trans_block_rsv - this will have 1 items worth left for |
6789 | * updating the inode. | 6904 | * updating the inode. |
6790 | */ | 6905 | */ |
6791 | rsv = btrfs_alloc_block_rsv(root); | 6906 | rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); |
6792 | if (!rsv) | 6907 | if (!rsv) |
6793 | return -ENOMEM; | 6908 | return -ENOMEM; |
6794 | rsv->size = min_size; | 6909 | rsv->size = min_size; |
6910 | rsv->failfast = 1; | ||
6795 | 6911 | ||
6796 | /* | 6912 | /* |
6797 | * 1 for the truncate slack space | 6913 | * 1 for the truncate slack space |
@@ -6837,36 +6953,21 @@ static int btrfs_truncate(struct inode *inode) | |||
6837 | &BTRFS_I(inode)->runtime_flags)) | 6953 | &BTRFS_I(inode)->runtime_flags)) |
6838 | btrfs_add_ordered_operation(trans, root, inode); | 6954 | btrfs_add_ordered_operation(trans, root, inode); |
6839 | 6955 | ||
6840 | while (1) { | 6956 | /* |
6841 | ret = btrfs_block_rsv_refill(root, rsv, min_size); | 6957 | * So if we truncate and then write and fsync we normally would just |
6842 | if (ret) { | 6958 | * write the extents that changed, which is a problem if we need to |
6843 | /* | 6959 | * first truncate that entire inode. So set this flag so we write out |
6844 | * This can only happen with the original transaction we | 6960 | * all of the extents in the inode to the sync log so we're completely |
6845 | * started above, every other time we shouldn't have a | 6961 | * safe. |
6846 | * transaction started yet. | 6962 | */ |
6847 | */ | 6963 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); |
6848 | if (ret == -EAGAIN) | 6964 | trans->block_rsv = rsv; |
6849 | goto end_trans; | ||
6850 | err = ret; | ||
6851 | break; | ||
6852 | } | ||
6853 | |||
6854 | if (!trans) { | ||
6855 | /* Just need the 1 for updating the inode */ | ||
6856 | trans = btrfs_start_transaction(root, 1); | ||
6857 | if (IS_ERR(trans)) { | ||
6858 | ret = err = PTR_ERR(trans); | ||
6859 | trans = NULL; | ||
6860 | break; | ||
6861 | } | ||
6862 | } | ||
6863 | |||
6864 | trans->block_rsv = rsv; | ||
6865 | 6965 | ||
6966 | while (1) { | ||
6866 | ret = btrfs_truncate_inode_items(trans, root, inode, | 6967 | ret = btrfs_truncate_inode_items(trans, root, inode, |
6867 | inode->i_size, | 6968 | inode->i_size, |
6868 | BTRFS_EXTENT_DATA_KEY); | 6969 | BTRFS_EXTENT_DATA_KEY); |
6869 | if (ret != -EAGAIN) { | 6970 | if (ret != -ENOSPC) { |
6870 | err = ret; | 6971 | err = ret; |
6871 | break; | 6972 | break; |
6872 | } | 6973 | } |
@@ -6877,11 +6978,22 @@ static int btrfs_truncate(struct inode *inode) | |||
6877 | err = ret; | 6978 | err = ret; |
6878 | break; | 6979 | break; |
6879 | } | 6980 | } |
6880 | end_trans: | 6981 | |
6881 | nr = trans->blocks_used; | 6982 | nr = trans->blocks_used; |
6882 | btrfs_end_transaction(trans, root); | 6983 | btrfs_end_transaction(trans, root); |
6883 | trans = NULL; | ||
6884 | btrfs_btree_balance_dirty(root, nr); | 6984 | btrfs_btree_balance_dirty(root, nr); |
6985 | |||
6986 | trans = btrfs_start_transaction(root, 2); | ||
6987 | if (IS_ERR(trans)) { | ||
6988 | ret = err = PTR_ERR(trans); | ||
6989 | trans = NULL; | ||
6990 | break; | ||
6991 | } | ||
6992 | |||
6993 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, | ||
6994 | rsv, min_size); | ||
6995 | BUG_ON(ret); /* shouldn't happen */ | ||
6996 | trans->block_rsv = rsv; | ||
6885 | } | 6997 | } |
6886 | 6998 | ||
6887 | if (ret == 0 && inode->i_nlink > 0) { | 6999 | if (ret == 0 && inode->i_nlink > 0) { |
@@ -6965,6 +7077,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6965 | ei->csum_bytes = 0; | 7077 | ei->csum_bytes = 0; |
6966 | ei->index_cnt = (u64)-1; | 7078 | ei->index_cnt = (u64)-1; |
6967 | ei->last_unlink_trans = 0; | 7079 | ei->last_unlink_trans = 0; |
7080 | ei->last_log_commit = 0; | ||
6968 | 7081 | ||
6969 | spin_lock_init(&ei->lock); | 7082 | spin_lock_init(&ei->lock); |
6970 | ei->outstanding_extents = 0; | 7083 | ei->outstanding_extents = 0; |
@@ -7095,31 +7208,31 @@ void btrfs_destroy_cachep(void) | |||
7095 | 7208 | ||
7096 | int btrfs_init_cachep(void) | 7209 | int btrfs_init_cachep(void) |
7097 | { | 7210 | { |
7098 | btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache", | 7211 | btrfs_inode_cachep = kmem_cache_create("btrfs_inode", |
7099 | sizeof(struct btrfs_inode), 0, | 7212 | sizeof(struct btrfs_inode), 0, |
7100 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once); | 7213 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once); |
7101 | if (!btrfs_inode_cachep) | 7214 | if (!btrfs_inode_cachep) |
7102 | goto fail; | 7215 | goto fail; |
7103 | 7216 | ||
7104 | btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache", | 7217 | btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle", |
7105 | sizeof(struct btrfs_trans_handle), 0, | 7218 | sizeof(struct btrfs_trans_handle), 0, |
7106 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 7219 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
7107 | if (!btrfs_trans_handle_cachep) | 7220 | if (!btrfs_trans_handle_cachep) |
7108 | goto fail; | 7221 | goto fail; |
7109 | 7222 | ||
7110 | btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache", | 7223 | btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction", |
7111 | sizeof(struct btrfs_transaction), 0, | 7224 | sizeof(struct btrfs_transaction), 0, |
7112 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 7225 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
7113 | if (!btrfs_transaction_cachep) | 7226 | if (!btrfs_transaction_cachep) |
7114 | goto fail; | 7227 | goto fail; |
7115 | 7228 | ||
7116 | btrfs_path_cachep = kmem_cache_create("btrfs_path_cache", | 7229 | btrfs_path_cachep = kmem_cache_create("btrfs_path", |
7117 | sizeof(struct btrfs_path), 0, | 7230 | sizeof(struct btrfs_path), 0, |
7118 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 7231 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
7119 | if (!btrfs_path_cachep) | 7232 | if (!btrfs_path_cachep) |
7120 | goto fail; | 7233 | goto fail; |
7121 | 7234 | ||
7122 | btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space_cache", | 7235 | btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space", |
7123 | sizeof(struct btrfs_free_space), 0, | 7236 | sizeof(struct btrfs_free_space), 0, |
7124 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 7237 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
7125 | if (!btrfs_free_space_cachep) | 7238 | if (!btrfs_free_space_cachep) |
@@ -7513,6 +7626,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
7513 | loff_t actual_len, u64 *alloc_hint, | 7626 | loff_t actual_len, u64 *alloc_hint, |
7514 | struct btrfs_trans_handle *trans) | 7627 | struct btrfs_trans_handle *trans) |
7515 | { | 7628 | { |
7629 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
7630 | struct extent_map *em; | ||
7516 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7631 | struct btrfs_root *root = BTRFS_I(inode)->root; |
7517 | struct btrfs_key ins; | 7632 | struct btrfs_key ins; |
7518 | u64 cur_offset = start; | 7633 | u64 cur_offset = start; |
@@ -7553,6 +7668,37 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
7553 | btrfs_drop_extent_cache(inode, cur_offset, | 7668 | btrfs_drop_extent_cache(inode, cur_offset, |
7554 | cur_offset + ins.offset -1, 0); | 7669 | cur_offset + ins.offset -1, 0); |
7555 | 7670 | ||
7671 | em = alloc_extent_map(); | ||
7672 | if (!em) { | ||
7673 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
7674 | &BTRFS_I(inode)->runtime_flags); | ||
7675 | goto next; | ||
7676 | } | ||
7677 | |||
7678 | em->start = cur_offset; | ||
7679 | em->orig_start = cur_offset; | ||
7680 | em->len = ins.offset; | ||
7681 | em->block_start = ins.objectid; | ||
7682 | em->block_len = ins.offset; | ||
7683 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
7684 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); | ||
7685 | em->generation = trans->transid; | ||
7686 | |||
7687 | while (1) { | ||
7688 | write_lock(&em_tree->lock); | ||
7689 | ret = add_extent_mapping(em_tree, em); | ||
7690 | if (!ret) | ||
7691 | list_move(&em->list, | ||
7692 | &em_tree->modified_extents); | ||
7693 | write_unlock(&em_tree->lock); | ||
7694 | if (ret != -EEXIST) | ||
7695 | break; | ||
7696 | btrfs_drop_extent_cache(inode, cur_offset, | ||
7697 | cur_offset + ins.offset - 1, | ||
7698 | 0); | ||
7699 | } | ||
7700 | free_extent_map(em); | ||
7701 | next: | ||
7556 | num_bytes -= ins.offset; | 7702 | num_bytes -= ins.offset; |
7557 | cur_offset += ins.offset; | 7703 | cur_offset += ins.offset; |
7558 | *alloc_hint = ins.objectid + ins.offset; | 7704 | *alloc_hint = ins.objectid + ins.offset; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 47127c1bd290..61168805f175 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -181,6 +181,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
181 | int ret; | 181 | int ret; |
182 | u64 ip_oldflags; | 182 | u64 ip_oldflags; |
183 | unsigned int i_oldflags; | 183 | unsigned int i_oldflags; |
184 | umode_t mode; | ||
184 | 185 | ||
185 | if (btrfs_root_readonly(root)) | 186 | if (btrfs_root_readonly(root)) |
186 | return -EROFS; | 187 | return -EROFS; |
@@ -203,6 +204,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
203 | 204 | ||
204 | ip_oldflags = ip->flags; | 205 | ip_oldflags = ip->flags; |
205 | i_oldflags = inode->i_flags; | 206 | i_oldflags = inode->i_flags; |
207 | mode = inode->i_mode; | ||
206 | 208 | ||
207 | flags = btrfs_mask_flags(inode->i_mode, flags); | 209 | flags = btrfs_mask_flags(inode->i_mode, flags); |
208 | oldflags = btrfs_flags_to_ioctl(ip->flags); | 210 | oldflags = btrfs_flags_to_ioctl(ip->flags); |
@@ -237,10 +239,31 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
237 | ip->flags |= BTRFS_INODE_DIRSYNC; | 239 | ip->flags |= BTRFS_INODE_DIRSYNC; |
238 | else | 240 | else |
239 | ip->flags &= ~BTRFS_INODE_DIRSYNC; | 241 | ip->flags &= ~BTRFS_INODE_DIRSYNC; |
240 | if (flags & FS_NOCOW_FL) | 242 | if (flags & FS_NOCOW_FL) { |
241 | ip->flags |= BTRFS_INODE_NODATACOW; | 243 | if (S_ISREG(mode)) { |
242 | else | 244 | /* |
243 | ip->flags &= ~BTRFS_INODE_NODATACOW; | 245 | * It's safe to turn csums off here, no extents exist. |
246 | * Otherwise we want the flag to reflect the real COW | ||
247 | * status of the file and will not set it. | ||
248 | */ | ||
249 | if (inode->i_size == 0) | ||
250 | ip->flags |= BTRFS_INODE_NODATACOW | ||
251 | | BTRFS_INODE_NODATASUM; | ||
252 | } else { | ||
253 | ip->flags |= BTRFS_INODE_NODATACOW; | ||
254 | } | ||
255 | } else { | ||
256 | /* | ||
257 | * Revert back under same assuptions as above | ||
258 | */ | ||
259 | if (S_ISREG(mode)) { | ||
260 | if (inode->i_size == 0) | ||
261 | ip->flags &= ~(BTRFS_INODE_NODATACOW | ||
262 | | BTRFS_INODE_NODATASUM); | ||
263 | } else { | ||
264 | ip->flags &= ~BTRFS_INODE_NODATACOW; | ||
265 | } | ||
266 | } | ||
244 | 267 | ||
245 | /* | 268 | /* |
246 | * The COMPRESS flag can only be changed by users, while the NOCOMPRESS | 269 | * The COMPRESS flag can only be changed by users, while the NOCOMPRESS |
@@ -516,7 +539,8 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
516 | if (!pending_snapshot) | 539 | if (!pending_snapshot) |
517 | return -ENOMEM; | 540 | return -ENOMEM; |
518 | 541 | ||
519 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); | 542 | btrfs_init_block_rsv(&pending_snapshot->block_rsv, |
543 | BTRFS_BLOCK_RSV_TEMP); | ||
520 | pending_snapshot->dentry = dentry; | 544 | pending_snapshot->dentry = dentry; |
521 | pending_snapshot->root = root; | 545 | pending_snapshot->root = root; |
522 | pending_snapshot->readonly = readonly; | 546 | pending_snapshot->readonly = readonly; |
@@ -525,7 +549,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
525 | *inherit = NULL; /* take responsibility to free it */ | 549 | *inherit = NULL; /* take responsibility to free it */ |
526 | } | 550 | } |
527 | 551 | ||
528 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); | 552 | trans = btrfs_start_transaction(root->fs_info->extent_root, 6); |
529 | if (IS_ERR(trans)) { | 553 | if (IS_ERR(trans)) { |
530 | ret = PTR_ERR(trans); | 554 | ret = PTR_ERR(trans); |
531 | goto fail; | 555 | goto fail; |
@@ -614,7 +638,7 @@ static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir) | |||
614 | return -ENOENT; | 638 | return -ENOENT; |
615 | 639 | ||
616 | BUG_ON(victim->d_parent->d_inode != dir); | 640 | BUG_ON(victim->d_parent->d_inode != dir); |
617 | audit_inode_child(victim, dir); | 641 | audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); |
618 | 642 | ||
619 | error = inode_permission(dir, MAY_WRITE | MAY_EXEC); | 643 | error = inode_permission(dir, MAY_WRITE | MAY_EXEC); |
620 | if (error) | 644 | if (error) |
@@ -1022,8 +1046,8 @@ again: | |||
1022 | page_start, page_end - 1, 0, &cached_state); | 1046 | page_start, page_end - 1, 0, &cached_state); |
1023 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, | 1047 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, |
1024 | page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | | 1048 | page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | |
1025 | EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, | 1049 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, |
1026 | GFP_NOFS); | 1050 | &cached_state, GFP_NOFS); |
1027 | 1051 | ||
1028 | if (i_done != page_cnt) { | 1052 | if (i_done != page_cnt) { |
1029 | spin_lock(&BTRFS_I(inode)->lock); | 1053 | spin_lock(&BTRFS_I(inode)->lock); |
@@ -1034,8 +1058,8 @@ again: | |||
1034 | } | 1058 | } |
1035 | 1059 | ||
1036 | 1060 | ||
1037 | btrfs_set_extent_delalloc(inode, page_start, page_end - 1, | 1061 | set_extent_defrag(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, |
1038 | &cached_state); | 1062 | &cached_state, GFP_NOFS); |
1039 | 1063 | ||
1040 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | 1064 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
1041 | page_start, page_end - 1, &cached_state, | 1065 | page_start, page_end - 1, &cached_state, |
@@ -2351,7 +2375,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2351 | int ret; | 2375 | int ret; |
2352 | u64 len = olen; | 2376 | u64 len = olen; |
2353 | u64 bs = root->fs_info->sb->s_blocksize; | 2377 | u64 bs = root->fs_info->sb->s_blocksize; |
2354 | u64 hint_byte; | ||
2355 | 2378 | ||
2356 | /* | 2379 | /* |
2357 | * TODO: | 2380 | * TODO: |
@@ -2456,13 +2479,13 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2456 | another, and lock file content */ | 2479 | another, and lock file content */ |
2457 | while (1) { | 2480 | while (1) { |
2458 | struct btrfs_ordered_extent *ordered; | 2481 | struct btrfs_ordered_extent *ordered; |
2459 | lock_extent(&BTRFS_I(src)->io_tree, off, off+len); | 2482 | lock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); |
2460 | ordered = btrfs_lookup_first_ordered_extent(src, off+len); | 2483 | ordered = btrfs_lookup_first_ordered_extent(src, off + len - 1); |
2461 | if (!ordered && | 2484 | if (!ordered && |
2462 | !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len, | 2485 | !test_range_bit(&BTRFS_I(src)->io_tree, off, off + len - 1, |
2463 | EXTENT_DELALLOC, 0, NULL)) | 2486 | EXTENT_DELALLOC, 0, NULL)) |
2464 | break; | 2487 | break; |
2465 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len); | 2488 | unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); |
2466 | if (ordered) | 2489 | if (ordered) |
2467 | btrfs_put_ordered_extent(ordered); | 2490 | btrfs_put_ordered_extent(ordered); |
2468 | btrfs_wait_ordered_range(src, off, len); | 2491 | btrfs_wait_ordered_range(src, off, len); |
@@ -2536,7 +2559,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2536 | btrfs_release_path(path); | 2559 | btrfs_release_path(path); |
2537 | 2560 | ||
2538 | if (key.offset + datal <= off || | 2561 | if (key.offset + datal <= off || |
2539 | key.offset >= off+len) | 2562 | key.offset >= off + len - 1) |
2540 | goto next; | 2563 | goto next; |
2541 | 2564 | ||
2542 | memcpy(&new_key, &key, sizeof(new_key)); | 2565 | memcpy(&new_key, &key, sizeof(new_key)); |
@@ -2574,10 +2597,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2574 | datal -= off - key.offset; | 2597 | datal -= off - key.offset; |
2575 | } | 2598 | } |
2576 | 2599 | ||
2577 | ret = btrfs_drop_extents(trans, inode, | 2600 | ret = btrfs_drop_extents(trans, root, inode, |
2578 | new_key.offset, | 2601 | new_key.offset, |
2579 | new_key.offset + datal, | 2602 | new_key.offset + datal, |
2580 | &hint_byte, 1); | 2603 | 1); |
2581 | if (ret) { | 2604 | if (ret) { |
2582 | btrfs_abort_transaction(trans, root, | 2605 | btrfs_abort_transaction(trans, root, |
2583 | ret); | 2606 | ret); |
@@ -2637,8 +2660,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2637 | new_key.offset += skip; | 2660 | new_key.offset += skip; |
2638 | } | 2661 | } |
2639 | 2662 | ||
2640 | if (key.offset + datal > off+len) | 2663 | if (key.offset + datal > off + len) |
2641 | trim = key.offset + datal - (off+len); | 2664 | trim = key.offset + datal - (off + len); |
2642 | 2665 | ||
2643 | if (comp && (skip || trim)) { | 2666 | if (comp && (skip || trim)) { |
2644 | ret = -EINVAL; | 2667 | ret = -EINVAL; |
@@ -2648,10 +2671,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2648 | size -= skip + trim; | 2671 | size -= skip + trim; |
2649 | datal -= skip + trim; | 2672 | datal -= skip + trim; |
2650 | 2673 | ||
2651 | ret = btrfs_drop_extents(trans, inode, | 2674 | ret = btrfs_drop_extents(trans, root, inode, |
2652 | new_key.offset, | 2675 | new_key.offset, |
2653 | new_key.offset + datal, | 2676 | new_key.offset + datal, |
2654 | &hint_byte, 1); | 2677 | 1); |
2655 | if (ret) { | 2678 | if (ret) { |
2656 | btrfs_abort_transaction(trans, root, | 2679 | btrfs_abort_transaction(trans, root, |
2657 | ret); | 2680 | ret); |
@@ -2715,7 +2738,7 @@ next: | |||
2715 | ret = 0; | 2738 | ret = 0; |
2716 | out: | 2739 | out: |
2717 | btrfs_release_path(path); | 2740 | btrfs_release_path(path); |
2718 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len); | 2741 | unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); |
2719 | out_unlock: | 2742 | out_unlock: |
2720 | mutex_unlock(&src->i_mutex); | 2743 | mutex_unlock(&src->i_mutex); |
2721 | mutex_unlock(&inode->i_mutex); | 2744 | mutex_unlock(&inode->i_mutex); |
@@ -2850,8 +2873,8 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
2850 | return 0; | 2873 | return 0; |
2851 | } | 2874 | } |
2852 | 2875 | ||
2853 | static void get_block_group_info(struct list_head *groups_list, | 2876 | void btrfs_get_block_group_info(struct list_head *groups_list, |
2854 | struct btrfs_ioctl_space_info *space) | 2877 | struct btrfs_ioctl_space_info *space) |
2855 | { | 2878 | { |
2856 | struct btrfs_block_group_cache *block_group; | 2879 | struct btrfs_block_group_cache *block_group; |
2857 | 2880 | ||
@@ -2959,8 +2982,8 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) | |||
2959 | down_read(&info->groups_sem); | 2982 | down_read(&info->groups_sem); |
2960 | for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { | 2983 | for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { |
2961 | if (!list_empty(&info->block_groups[c])) { | 2984 | if (!list_empty(&info->block_groups[c])) { |
2962 | get_block_group_info(&info->block_groups[c], | 2985 | btrfs_get_block_group_info( |
2963 | &space); | 2986 | &info->block_groups[c], &space); |
2964 | memcpy(dest, &space, sizeof(space)); | 2987 | memcpy(dest, &space, sizeof(space)); |
2965 | dest++; | 2988 | dest++; |
2966 | space_args.total_spaces++; | 2989 | space_args.total_spaces++; |
@@ -3208,11 +3231,9 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, | |||
3208 | { | 3231 | { |
3209 | int ret = 0; | 3232 | int ret = 0; |
3210 | int size; | 3233 | int size; |
3211 | u64 extent_item_pos; | ||
3212 | struct btrfs_ioctl_logical_ino_args *loi; | 3234 | struct btrfs_ioctl_logical_ino_args *loi; |
3213 | struct btrfs_data_container *inodes = NULL; | 3235 | struct btrfs_data_container *inodes = NULL; |
3214 | struct btrfs_path *path = NULL; | 3236 | struct btrfs_path *path = NULL; |
3215 | struct btrfs_key key; | ||
3216 | 3237 | ||
3217 | if (!capable(CAP_SYS_ADMIN)) | 3238 | if (!capable(CAP_SYS_ADMIN)) |
3218 | return -EPERM; | 3239 | return -EPERM; |
@@ -3230,7 +3251,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, | |||
3230 | goto out; | 3251 | goto out; |
3231 | } | 3252 | } |
3232 | 3253 | ||
3233 | size = min_t(u32, loi->size, 4096); | 3254 | size = min_t(u32, loi->size, 64 * 1024); |
3234 | inodes = init_data_container(size); | 3255 | inodes = init_data_container(size); |
3235 | if (IS_ERR(inodes)) { | 3256 | if (IS_ERR(inodes)) { |
3236 | ret = PTR_ERR(inodes); | 3257 | ret = PTR_ERR(inodes); |
@@ -3238,22 +3259,13 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, | |||
3238 | goto out; | 3259 | goto out; |
3239 | } | 3260 | } |
3240 | 3261 | ||
3241 | ret = extent_from_logical(root->fs_info, loi->logical, path, &key); | 3262 | ret = iterate_inodes_from_logical(loi->logical, root->fs_info, path, |
3242 | btrfs_release_path(path); | 3263 | build_ino_list, inodes); |
3243 | 3264 | if (ret == -EINVAL) | |
3244 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) | ||
3245 | ret = -ENOENT; | 3265 | ret = -ENOENT; |
3246 | if (ret < 0) | 3266 | if (ret < 0) |
3247 | goto out; | 3267 | goto out; |
3248 | 3268 | ||
3249 | extent_item_pos = loi->logical - key.objectid; | ||
3250 | ret = iterate_extent_inodes(root->fs_info, key.objectid, | ||
3251 | extent_item_pos, 0, build_ino_list, | ||
3252 | inodes); | ||
3253 | |||
3254 | if (ret < 0) | ||
3255 | goto out; | ||
3256 | |||
3257 | ret = copy_to_user((void *)(unsigned long)loi->inodes, | 3269 | ret = copy_to_user((void *)(unsigned long)loi->inodes, |
3258 | (void *)(unsigned long)inodes, size); | 3270 | (void *)(unsigned long)inodes, size); |
3259 | if (ret) | 3271 | if (ret) |
@@ -3261,7 +3273,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, | |||
3261 | 3273 | ||
3262 | out: | 3274 | out: |
3263 | btrfs_free_path(path); | 3275 | btrfs_free_path(path); |
3264 | kfree(inodes); | 3276 | vfree(inodes); |
3265 | kfree(loi); | 3277 | kfree(loi); |
3266 | 3278 | ||
3267 | return ret; | 3279 | return ret; |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 051c7fe551dd..7772f02ba28e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -25,6 +25,8 @@ | |||
25 | #include "btrfs_inode.h" | 25 | #include "btrfs_inode.h" |
26 | #include "extent_io.h" | 26 | #include "extent_io.h" |
27 | 27 | ||
28 | static struct kmem_cache *btrfs_ordered_extent_cache; | ||
29 | |||
28 | static u64 entry_end(struct btrfs_ordered_extent *entry) | 30 | static u64 entry_end(struct btrfs_ordered_extent *entry) |
29 | { | 31 | { |
30 | if (entry->file_offset + entry->len < entry->file_offset) | 32 | if (entry->file_offset + entry->len < entry->file_offset) |
@@ -187,7 +189,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
187 | struct btrfs_ordered_extent *entry; | 189 | struct btrfs_ordered_extent *entry; |
188 | 190 | ||
189 | tree = &BTRFS_I(inode)->ordered_tree; | 191 | tree = &BTRFS_I(inode)->ordered_tree; |
190 | entry = kzalloc(sizeof(*entry), GFP_NOFS); | 192 | entry = kmem_cache_zalloc(btrfs_ordered_extent_cache, GFP_NOFS); |
191 | if (!entry) | 193 | if (!entry) |
192 | return -ENOMEM; | 194 | return -ENOMEM; |
193 | 195 | ||
@@ -421,7 +423,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) | |||
421 | list_del(&sum->list); | 423 | list_del(&sum->list); |
422 | kfree(sum); | 424 | kfree(sum); |
423 | } | 425 | } |
424 | kfree(entry); | 426 | kmem_cache_free(btrfs_ordered_extent_cache, entry); |
425 | } | 427 | } |
426 | } | 428 | } |
427 | 429 | ||
@@ -466,8 +468,7 @@ void btrfs_remove_ordered_extent(struct inode *inode, | |||
466 | * wait for all the ordered extents in a root. This is done when balancing | 468 | * wait for all the ordered extents in a root. This is done when balancing |
467 | * space between drives. | 469 | * space between drives. |
468 | */ | 470 | */ |
469 | void btrfs_wait_ordered_extents(struct btrfs_root *root, | 471 | void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) |
470 | int nocow_only, int delay_iput) | ||
471 | { | 472 | { |
472 | struct list_head splice; | 473 | struct list_head splice; |
473 | struct list_head *cur; | 474 | struct list_head *cur; |
@@ -482,15 +483,6 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, | |||
482 | cur = splice.next; | 483 | cur = splice.next; |
483 | ordered = list_entry(cur, struct btrfs_ordered_extent, | 484 | ordered = list_entry(cur, struct btrfs_ordered_extent, |
484 | root_extent_list); | 485 | root_extent_list); |
485 | if (nocow_only && | ||
486 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) && | ||
487 | !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) { | ||
488 | list_move(&ordered->root_extent_list, | ||
489 | &root->fs_info->ordered_extents); | ||
490 | cond_resched_lock(&root->fs_info->ordered_extent_lock); | ||
491 | continue; | ||
492 | } | ||
493 | |||
494 | list_del_init(&ordered->root_extent_list); | 486 | list_del_init(&ordered->root_extent_list); |
495 | atomic_inc(&ordered->refs); | 487 | atomic_inc(&ordered->refs); |
496 | 488 | ||
@@ -775,7 +767,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
775 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; | 767 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; |
776 | u64 disk_i_size; | 768 | u64 disk_i_size; |
777 | u64 new_i_size; | 769 | u64 new_i_size; |
778 | u64 i_size_test; | ||
779 | u64 i_size = i_size_read(inode); | 770 | u64 i_size = i_size_read(inode); |
780 | struct rb_node *node; | 771 | struct rb_node *node; |
781 | struct rb_node *prev = NULL; | 772 | struct rb_node *prev = NULL; |
@@ -835,55 +826,30 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
835 | break; | 826 | break; |
836 | if (test->file_offset >= i_size) | 827 | if (test->file_offset >= i_size) |
837 | break; | 828 | break; |
838 | if (test->file_offset >= disk_i_size) | 829 | if (test->file_offset >= disk_i_size) { |
830 | /* | ||
831 | * we don't update disk_i_size now, so record this | ||
832 | * undealt i_size. Or we will not know the real | ||
833 | * i_size. | ||
834 | */ | ||
835 | if (test->outstanding_isize < offset) | ||
836 | test->outstanding_isize = offset; | ||
837 | if (ordered && | ||
838 | ordered->outstanding_isize > | ||
839 | test->outstanding_isize) | ||
840 | test->outstanding_isize = | ||
841 | ordered->outstanding_isize; | ||
839 | goto out; | 842 | goto out; |
840 | } | ||
841 | new_i_size = min_t(u64, offset, i_size); | ||
842 | |||
843 | /* | ||
844 | * at this point, we know we can safely update i_size to at least | ||
845 | * the offset from this ordered extent. But, we need to | ||
846 | * walk forward and see if ios from higher up in the file have | ||
847 | * finished. | ||
848 | */ | ||
849 | if (ordered) { | ||
850 | node = rb_next(&ordered->rb_node); | ||
851 | } else { | ||
852 | if (prev) | ||
853 | node = rb_next(prev); | ||
854 | else | ||
855 | node = rb_first(&tree->tree); | ||
856 | } | ||
857 | |||
858 | /* | ||
859 | * We are looking for an area between our current extent and the next | ||
860 | * ordered extent to update the i_size to. There are 3 cases here | ||
861 | * | ||
862 | * 1) We don't actually have anything and we can update to i_size. | ||
863 | * 2) We have stuff but they already did their i_size update so again we | ||
864 | * can just update to i_size. | ||
865 | * 3) We have an outstanding ordered extent so the most we can update | ||
866 | * our disk_i_size to is the start of the next offset. | ||
867 | */ | ||
868 | i_size_test = i_size; | ||
869 | for (; node; node = rb_next(node)) { | ||
870 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
871 | |||
872 | if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) | ||
873 | continue; | ||
874 | if (test->file_offset > offset) { | ||
875 | i_size_test = test->file_offset; | ||
876 | break; | ||
877 | } | 843 | } |
878 | } | 844 | } |
845 | new_i_size = min_t(u64, offset, i_size); | ||
879 | 846 | ||
880 | /* | 847 | /* |
881 | * i_size_test is the end of a region after this ordered | 848 | * Some ordered extents may completed before the current one, and |
882 | * extent where there are no ordered extents, we can safely set | 849 | * we hold the real i_size in ->outstanding_isize. |
883 | * disk_i_size to this. | ||
884 | */ | 850 | */ |
885 | if (i_size_test > offset) | 851 | if (ordered && ordered->outstanding_isize > new_i_size) |
886 | new_i_size = min_t(u64, i_size_test, i_size); | 852 | new_i_size = min_t(u64, ordered->outstanding_isize, i_size); |
887 | BTRFS_I(inode)->disk_i_size = new_i_size; | 853 | BTRFS_I(inode)->disk_i_size = new_i_size; |
888 | ret = 0; | 854 | ret = 0; |
889 | out: | 855 | out: |
@@ -984,3 +950,20 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | |||
984 | } | 950 | } |
985 | spin_unlock(&root->fs_info->ordered_extent_lock); | 951 | spin_unlock(&root->fs_info->ordered_extent_lock); |
986 | } | 952 | } |
953 | |||
954 | int __init ordered_data_init(void) | ||
955 | { | ||
956 | btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent", | ||
957 | sizeof(struct btrfs_ordered_extent), 0, | ||
958 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | ||
959 | NULL); | ||
960 | if (!btrfs_ordered_extent_cache) | ||
961 | return -ENOMEM; | ||
962 | return 0; | ||
963 | } | ||
964 | |||
965 | void ordered_data_exit(void) | ||
966 | { | ||
967 | if (btrfs_ordered_extent_cache) | ||
968 | kmem_cache_destroy(btrfs_ordered_extent_cache); | ||
969 | } | ||
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index e03c560d2997..dd27a0b46a37 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -96,6 +96,13 @@ struct btrfs_ordered_extent { | |||
96 | /* number of bytes that still need writing */ | 96 | /* number of bytes that still need writing */ |
97 | u64 bytes_left; | 97 | u64 bytes_left; |
98 | 98 | ||
99 | /* | ||
100 | * the end of the ordered extent which is behind it but | ||
101 | * didn't update disk_i_size. Please see the comment of | ||
102 | * btrfs_ordered_update_i_size(); | ||
103 | */ | ||
104 | u64 outstanding_isize; | ||
105 | |||
99 | /* flags (described above) */ | 106 | /* flags (described above) */ |
100 | unsigned long flags; | 107 | unsigned long flags; |
101 | 108 | ||
@@ -183,6 +190,7 @@ void btrfs_run_ordered_operations(struct btrfs_root *root, int wait); | |||
183 | void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | 190 | void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, |
184 | struct btrfs_root *root, | 191 | struct btrfs_root *root, |
185 | struct inode *inode); | 192 | struct inode *inode); |
186 | void btrfs_wait_ordered_extents(struct btrfs_root *root, | 193 | void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput); |
187 | int nocow_only, int delay_iput); | 194 | int __init ordered_data_init(void); |
195 | void ordered_data_exit(void); | ||
188 | #endif | 196 | #endif |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index b65015581744..5039686df6ae 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
@@ -1145,12 +1145,12 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
1145 | 1145 | ||
1146 | ulist_reinit(tmp); | 1146 | ulist_reinit(tmp); |
1147 | /* XXX id not needed */ | 1147 | /* XXX id not needed */ |
1148 | ulist_add(tmp, qg->qgroupid, (unsigned long)qg, GFP_ATOMIC); | 1148 | ulist_add(tmp, qg->qgroupid, (u64)(uintptr_t)qg, GFP_ATOMIC); |
1149 | ULIST_ITER_INIT(&tmp_uiter); | 1149 | ULIST_ITER_INIT(&tmp_uiter); |
1150 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { | 1150 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { |
1151 | struct btrfs_qgroup_list *glist; | 1151 | struct btrfs_qgroup_list *glist; |
1152 | 1152 | ||
1153 | qg = (struct btrfs_qgroup *)tmp_unode->aux; | 1153 | qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; |
1154 | if (qg->refcnt < seq) | 1154 | if (qg->refcnt < seq) |
1155 | qg->refcnt = seq + 1; | 1155 | qg->refcnt = seq + 1; |
1156 | else | 1156 | else |
@@ -1158,7 +1158,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
1158 | 1158 | ||
1159 | list_for_each_entry(glist, &qg->groups, next_group) { | 1159 | list_for_each_entry(glist, &qg->groups, next_group) { |
1160 | ulist_add(tmp, glist->group->qgroupid, | 1160 | ulist_add(tmp, glist->group->qgroupid, |
1161 | (unsigned long)glist->group, | 1161 | (u64)(uintptr_t)glist->group, |
1162 | GFP_ATOMIC); | 1162 | GFP_ATOMIC); |
1163 | } | 1163 | } |
1164 | } | 1164 | } |
@@ -1168,13 +1168,13 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
1168 | * step 2: walk from the new root | 1168 | * step 2: walk from the new root |
1169 | */ | 1169 | */ |
1170 | ulist_reinit(tmp); | 1170 | ulist_reinit(tmp); |
1171 | ulist_add(tmp, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); | 1171 | ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); |
1172 | ULIST_ITER_INIT(&uiter); | 1172 | ULIST_ITER_INIT(&uiter); |
1173 | while ((unode = ulist_next(tmp, &uiter))) { | 1173 | while ((unode = ulist_next(tmp, &uiter))) { |
1174 | struct btrfs_qgroup *qg; | 1174 | struct btrfs_qgroup *qg; |
1175 | struct btrfs_qgroup_list *glist; | 1175 | struct btrfs_qgroup_list *glist; |
1176 | 1176 | ||
1177 | qg = (struct btrfs_qgroup *)unode->aux; | 1177 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; |
1178 | if (qg->refcnt < seq) { | 1178 | if (qg->refcnt < seq) { |
1179 | /* not visited by step 1 */ | 1179 | /* not visited by step 1 */ |
1180 | qg->rfer += sgn * node->num_bytes; | 1180 | qg->rfer += sgn * node->num_bytes; |
@@ -1190,7 +1190,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
1190 | 1190 | ||
1191 | list_for_each_entry(glist, &qg->groups, next_group) { | 1191 | list_for_each_entry(glist, &qg->groups, next_group) { |
1192 | ulist_add(tmp, glist->group->qgroupid, | 1192 | ulist_add(tmp, glist->group->qgroupid, |
1193 | (unsigned long)glist->group, GFP_ATOMIC); | 1193 | (uintptr_t)glist->group, GFP_ATOMIC); |
1194 | } | 1194 | } |
1195 | } | 1195 | } |
1196 | 1196 | ||
@@ -1208,12 +1208,12 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
1208 | continue; | 1208 | continue; |
1209 | 1209 | ||
1210 | ulist_reinit(tmp); | 1210 | ulist_reinit(tmp); |
1211 | ulist_add(tmp, qg->qgroupid, (unsigned long)qg, GFP_ATOMIC); | 1211 | ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC); |
1212 | ULIST_ITER_INIT(&tmp_uiter); | 1212 | ULIST_ITER_INIT(&tmp_uiter); |
1213 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { | 1213 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { |
1214 | struct btrfs_qgroup_list *glist; | 1214 | struct btrfs_qgroup_list *glist; |
1215 | 1215 | ||
1216 | qg = (struct btrfs_qgroup *)tmp_unode->aux; | 1216 | qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; |
1217 | if (qg->tag == seq) | 1217 | if (qg->tag == seq) |
1218 | continue; | 1218 | continue; |
1219 | 1219 | ||
@@ -1225,7 +1225,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
1225 | 1225 | ||
1226 | list_for_each_entry(glist, &qg->groups, next_group) { | 1226 | list_for_each_entry(glist, &qg->groups, next_group) { |
1227 | ulist_add(tmp, glist->group->qgroupid, | 1227 | ulist_add(tmp, glist->group->qgroupid, |
1228 | (unsigned long)glist->group, | 1228 | (uintptr_t)glist->group, |
1229 | GFP_ATOMIC); | 1229 | GFP_ATOMIC); |
1230 | } | 1230 | } |
1231 | } | 1231 | } |
@@ -1469,13 +1469,17 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
1469 | * be exceeded | 1469 | * be exceeded |
1470 | */ | 1470 | */ |
1471 | ulist = ulist_alloc(GFP_ATOMIC); | 1471 | ulist = ulist_alloc(GFP_ATOMIC); |
1472 | ulist_add(ulist, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); | 1472 | if (!ulist) { |
1473 | ret = -ENOMEM; | ||
1474 | goto out; | ||
1475 | } | ||
1476 | ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); | ||
1473 | ULIST_ITER_INIT(&uiter); | 1477 | ULIST_ITER_INIT(&uiter); |
1474 | while ((unode = ulist_next(ulist, &uiter))) { | 1478 | while ((unode = ulist_next(ulist, &uiter))) { |
1475 | struct btrfs_qgroup *qg; | 1479 | struct btrfs_qgroup *qg; |
1476 | struct btrfs_qgroup_list *glist; | 1480 | struct btrfs_qgroup_list *glist; |
1477 | 1481 | ||
1478 | qg = (struct btrfs_qgroup *)unode->aux; | 1482 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; |
1479 | 1483 | ||
1480 | if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && | 1484 | if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && |
1481 | qg->reserved + qg->rfer + num_bytes > | 1485 | qg->reserved + qg->rfer + num_bytes > |
@@ -1489,7 +1493,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
1489 | 1493 | ||
1490 | list_for_each_entry(glist, &qg->groups, next_group) { | 1494 | list_for_each_entry(glist, &qg->groups, next_group) { |
1491 | ulist_add(ulist, glist->group->qgroupid, | 1495 | ulist_add(ulist, glist->group->qgroupid, |
1492 | (unsigned long)glist->group, GFP_ATOMIC); | 1496 | (uintptr_t)glist->group, GFP_ATOMIC); |
1493 | } | 1497 | } |
1494 | } | 1498 | } |
1495 | if (ret) | 1499 | if (ret) |
@@ -1502,7 +1506,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
1502 | while ((unode = ulist_next(ulist, &uiter))) { | 1506 | while ((unode = ulist_next(ulist, &uiter))) { |
1503 | struct btrfs_qgroup *qg; | 1507 | struct btrfs_qgroup *qg; |
1504 | 1508 | ||
1505 | qg = (struct btrfs_qgroup *)unode->aux; | 1509 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; |
1506 | 1510 | ||
1507 | qg->reserved += num_bytes; | 1511 | qg->reserved += num_bytes; |
1508 | } | 1512 | } |
@@ -1541,19 +1545,23 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) | |||
1541 | goto out; | 1545 | goto out; |
1542 | 1546 | ||
1543 | ulist = ulist_alloc(GFP_ATOMIC); | 1547 | ulist = ulist_alloc(GFP_ATOMIC); |
1544 | ulist_add(ulist, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); | 1548 | if (!ulist) { |
1549 | btrfs_std_error(fs_info, -ENOMEM); | ||
1550 | goto out; | ||
1551 | } | ||
1552 | ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); | ||
1545 | ULIST_ITER_INIT(&uiter); | 1553 | ULIST_ITER_INIT(&uiter); |
1546 | while ((unode = ulist_next(ulist, &uiter))) { | 1554 | while ((unode = ulist_next(ulist, &uiter))) { |
1547 | struct btrfs_qgroup *qg; | 1555 | struct btrfs_qgroup *qg; |
1548 | struct btrfs_qgroup_list *glist; | 1556 | struct btrfs_qgroup_list *glist; |
1549 | 1557 | ||
1550 | qg = (struct btrfs_qgroup *)unode->aux; | 1558 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; |
1551 | 1559 | ||
1552 | qg->reserved -= num_bytes; | 1560 | qg->reserved -= num_bytes; |
1553 | 1561 | ||
1554 | list_for_each_entry(glist, &qg->groups, next_group) { | 1562 | list_for_each_entry(glist, &qg->groups, next_group) { |
1555 | ulist_add(ulist, glist->group->qgroupid, | 1563 | ulist_add(ulist, glist->group->qgroupid, |
1556 | (unsigned long)glist->group, GFP_ATOMIC); | 1564 | (uintptr_t)glist->group, GFP_ATOMIC); |
1557 | } | 1565 | } |
1558 | } | 1566 | } |
1559 | 1567 | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 4da08652004d..776f0aa128fc 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -3270,8 +3270,8 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info, | |||
3270 | key.offset = 0; | 3270 | key.offset = 0; |
3271 | 3271 | ||
3272 | inode = btrfs_iget(fs_info->sb, &key, root, NULL); | 3272 | inode = btrfs_iget(fs_info->sb, &key, root, NULL); |
3273 | if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) { | 3273 | if (IS_ERR(inode) || is_bad_inode(inode)) { |
3274 | if (inode && !IS_ERR(inode)) | 3274 | if (!IS_ERR(inode)) |
3275 | iput(inode); | 3275 | iput(inode); |
3276 | return -ENOENT; | 3276 | return -ENOENT; |
3277 | } | 3277 | } |
@@ -3621,7 +3621,7 @@ next: | |||
3621 | 3621 | ||
3622 | ret = find_first_extent_bit(&rc->processed_blocks, | 3622 | ret = find_first_extent_bit(&rc->processed_blocks, |
3623 | key.objectid, &start, &end, | 3623 | key.objectid, &start, &end, |
3624 | EXTENT_DIRTY); | 3624 | EXTENT_DIRTY, NULL); |
3625 | 3625 | ||
3626 | if (ret == 0 && start <= key.objectid) { | 3626 | if (ret == 0 && start <= key.objectid) { |
3627 | btrfs_release_path(path); | 3627 | btrfs_release_path(path); |
@@ -3674,7 +3674,8 @@ int prepare_to_relocate(struct reloc_control *rc) | |||
3674 | struct btrfs_trans_handle *trans; | 3674 | struct btrfs_trans_handle *trans; |
3675 | int ret; | 3675 | int ret; |
3676 | 3676 | ||
3677 | rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root); | 3677 | rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root, |
3678 | BTRFS_BLOCK_RSV_TEMP); | ||
3678 | if (!rc->block_rsv) | 3679 | if (!rc->block_rsv) |
3679 | return -ENOMEM; | 3680 | return -ENOMEM; |
3680 | 3681 | ||
@@ -4057,7 +4058,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
4057 | (unsigned long long)rc->block_group->flags); | 4058 | (unsigned long long)rc->block_group->flags); |
4058 | 4059 | ||
4059 | btrfs_start_delalloc_inodes(fs_info->tree_root, 0); | 4060 | btrfs_start_delalloc_inodes(fs_info->tree_root, 0); |
4060 | btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); | 4061 | btrfs_wait_ordered_extents(fs_info->tree_root, 0); |
4061 | 4062 | ||
4062 | while (1) { | 4063 | while (1) { |
4063 | mutex_lock(&fs_info->cleaner_mutex); | 4064 | mutex_lock(&fs_info->cleaner_mutex); |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 10d8e4d88071..eb923d087da7 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -141,8 +141,10 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root | |||
141 | return -ENOMEM; | 141 | return -ENOMEM; |
142 | 142 | ||
143 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | 143 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); |
144 | if (ret < 0) | 144 | if (ret < 0) { |
145 | goto out_abort; | 145 | btrfs_abort_transaction(trans, root, ret); |
146 | goto out; | ||
147 | } | ||
146 | 148 | ||
147 | if (ret != 0) { | 149 | if (ret != 0) { |
148 | btrfs_print_leaf(root, path->nodes[0]); | 150 | btrfs_print_leaf(root, path->nodes[0]); |
@@ -166,16 +168,23 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root | |||
166 | btrfs_release_path(path); | 168 | btrfs_release_path(path); |
167 | ret = btrfs_search_slot(trans, root, key, path, | 169 | ret = btrfs_search_slot(trans, root, key, path, |
168 | -1, 1); | 170 | -1, 1); |
169 | if (ret < 0) | 171 | if (ret < 0) { |
170 | goto out_abort; | 172 | btrfs_abort_transaction(trans, root, ret); |
173 | goto out; | ||
174 | } | ||
175 | |||
171 | ret = btrfs_del_item(trans, root, path); | 176 | ret = btrfs_del_item(trans, root, path); |
172 | if (ret < 0) | 177 | if (ret < 0) { |
173 | goto out_abort; | 178 | btrfs_abort_transaction(trans, root, ret); |
179 | goto out; | ||
180 | } | ||
174 | btrfs_release_path(path); | 181 | btrfs_release_path(path); |
175 | ret = btrfs_insert_empty_item(trans, root, path, | 182 | ret = btrfs_insert_empty_item(trans, root, path, |
176 | key, sizeof(*item)); | 183 | key, sizeof(*item)); |
177 | if (ret < 0) | 184 | if (ret < 0) { |
178 | goto out_abort; | 185 | btrfs_abort_transaction(trans, root, ret); |
186 | goto out; | ||
187 | } | ||
179 | l = path->nodes[0]; | 188 | l = path->nodes[0]; |
180 | slot = path->slots[0]; | 189 | slot = path->slots[0]; |
181 | ptr = btrfs_item_ptr_offset(l, slot); | 190 | ptr = btrfs_item_ptr_offset(l, slot); |
@@ -192,10 +201,6 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root | |||
192 | out: | 201 | out: |
193 | btrfs_free_path(path); | 202 | btrfs_free_path(path); |
194 | return ret; | 203 | return ret; |
195 | |||
196 | out_abort: | ||
197 | btrfs_abort_transaction(trans, root, ret); | ||
198 | goto out; | ||
199 | } | 204 | } |
200 | 205 | ||
201 | int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 206 | int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index b223620cd5a6..27892f67e69b 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -352,13 +352,14 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
352 | struct extent_buffer *eb; | 352 | struct extent_buffer *eb; |
353 | struct btrfs_extent_item *ei; | 353 | struct btrfs_extent_item *ei; |
354 | struct scrub_warning swarn; | 354 | struct scrub_warning swarn; |
355 | u32 item_size; | 355 | unsigned long ptr = 0; |
356 | int ret; | 356 | u64 extent_item_pos; |
357 | u64 flags = 0; | ||
357 | u64 ref_root; | 358 | u64 ref_root; |
359 | u32 item_size; | ||
358 | u8 ref_level; | 360 | u8 ref_level; |
359 | unsigned long ptr = 0; | ||
360 | const int bufsize = 4096; | 361 | const int bufsize = 4096; |
361 | u64 extent_item_pos; | 362 | int ret; |
362 | 363 | ||
363 | path = btrfs_alloc_path(); | 364 | path = btrfs_alloc_path(); |
364 | 365 | ||
@@ -375,7 +376,8 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
375 | if (!path || !swarn.scratch_buf || !swarn.msg_buf) | 376 | if (!path || !swarn.scratch_buf || !swarn.msg_buf) |
376 | goto out; | 377 | goto out; |
377 | 378 | ||
378 | ret = extent_from_logical(fs_info, swarn.logical, path, &found_key); | 379 | ret = extent_from_logical(fs_info, swarn.logical, path, &found_key, |
380 | &flags); | ||
379 | if (ret < 0) | 381 | if (ret < 0) |
380 | goto out; | 382 | goto out; |
381 | 383 | ||
@@ -387,7 +389,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
387 | item_size = btrfs_item_size_nr(eb, path->slots[0]); | 389 | item_size = btrfs_item_size_nr(eb, path->slots[0]); |
388 | btrfs_release_path(path); | 390 | btrfs_release_path(path); |
389 | 391 | ||
390 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 392 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
391 | do { | 393 | do { |
392 | ret = tree_backref_for_extent(&ptr, eb, ei, item_size, | 394 | ret = tree_backref_for_extent(&ptr, eb, ei, item_size, |
393 | &ref_root, &ref_level); | 395 | &ref_root, &ref_level); |
@@ -1029,6 +1031,7 @@ static int scrub_setup_recheck_block(struct scrub_dev *sdev, | |||
1029 | spin_lock(&sdev->stat_lock); | 1031 | spin_lock(&sdev->stat_lock); |
1030 | sdev->stat.malloc_errors++; | 1032 | sdev->stat.malloc_errors++; |
1031 | spin_unlock(&sdev->stat_lock); | 1033 | spin_unlock(&sdev->stat_lock); |
1034 | kfree(bbio); | ||
1032 | return -ENOMEM; | 1035 | return -ENOMEM; |
1033 | } | 1036 | } |
1034 | sblock->page_count++; | 1037 | sblock->page_count++; |
@@ -1666,21 +1669,6 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work) | |||
1666 | scrub_block_put(sblock); | 1669 | scrub_block_put(sblock); |
1667 | } | 1670 | } |
1668 | 1671 | ||
1669 | if (sbio->err) { | ||
1670 | /* what is this good for??? */ | ||
1671 | sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1); | ||
1672 | sbio->bio->bi_flags |= 1 << BIO_UPTODATE; | ||
1673 | sbio->bio->bi_phys_segments = 0; | ||
1674 | sbio->bio->bi_idx = 0; | ||
1675 | |||
1676 | for (i = 0; i < sbio->page_count; i++) { | ||
1677 | struct bio_vec *bi; | ||
1678 | bi = &sbio->bio->bi_io_vec[i]; | ||
1679 | bi->bv_offset = 0; | ||
1680 | bi->bv_len = PAGE_SIZE; | ||
1681 | } | ||
1682 | } | ||
1683 | |||
1684 | bio_put(sbio->bio); | 1672 | bio_put(sbio->bio); |
1685 | sbio->bio = NULL; | 1673 | sbio->bio = NULL; |
1686 | spin_lock(&sdev->list_lock); | 1674 | spin_lock(&sdev->list_lock); |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index fb5ffe95f869..c7beb543a4a8 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
@@ -107,7 +107,6 @@ struct send_ctx { | |||
107 | int cur_inode_new; | 107 | int cur_inode_new; |
108 | int cur_inode_new_gen; | 108 | int cur_inode_new_gen; |
109 | int cur_inode_deleted; | 109 | int cur_inode_deleted; |
110 | int cur_inode_first_ref_orphan; | ||
111 | u64 cur_inode_size; | 110 | u64 cur_inode_size; |
112 | u64 cur_inode_mode; | 111 | u64 cur_inode_mode; |
113 | 112 | ||
@@ -126,7 +125,15 @@ struct send_ctx { | |||
126 | 125 | ||
127 | struct name_cache_entry { | 126 | struct name_cache_entry { |
128 | struct list_head list; | 127 | struct list_head list; |
129 | struct list_head use_list; | 128 | /* |
129 | * radix_tree has only 32bit entries but we need to handle 64bit inums. | ||
130 | * We use the lower 32bit of the 64bit inum to store it in the tree. If | ||
131 | * more then one inum would fall into the same entry, we use radix_list | ||
132 | * to store the additional entries. radix_list is also used to store | ||
133 | * entries where two entries have the same inum but different | ||
134 | * generations. | ||
135 | */ | ||
136 | struct list_head radix_list; | ||
130 | u64 ino; | 137 | u64 ino; |
131 | u64 gen; | 138 | u64 gen; |
132 | u64 parent_ino; | 139 | u64 parent_ino; |
@@ -328,6 +335,7 @@ out: | |||
328 | return ret; | 335 | return ret; |
329 | } | 336 | } |
330 | 337 | ||
338 | #if 0 | ||
331 | static void fs_path_remove(struct fs_path *p) | 339 | static void fs_path_remove(struct fs_path *p) |
332 | { | 340 | { |
333 | BUG_ON(p->reversed); | 341 | BUG_ON(p->reversed); |
@@ -335,6 +343,7 @@ static void fs_path_remove(struct fs_path *p) | |||
335 | p->end--; | 343 | p->end--; |
336 | *p->end = 0; | 344 | *p->end = 0; |
337 | } | 345 | } |
346 | #endif | ||
338 | 347 | ||
339 | static int fs_path_copy(struct fs_path *p, struct fs_path *from) | 348 | static int fs_path_copy(struct fs_path *p, struct fs_path *from) |
340 | { | 349 | { |
@@ -377,7 +386,7 @@ static struct btrfs_path *alloc_path_for_send(void) | |||
377 | return path; | 386 | return path; |
378 | } | 387 | } |
379 | 388 | ||
380 | static int write_buf(struct send_ctx *sctx, const void *buf, u32 len) | 389 | int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off) |
381 | { | 390 | { |
382 | int ret; | 391 | int ret; |
383 | mm_segment_t old_fs; | 392 | mm_segment_t old_fs; |
@@ -387,8 +396,7 @@ static int write_buf(struct send_ctx *sctx, const void *buf, u32 len) | |||
387 | set_fs(KERNEL_DS); | 396 | set_fs(KERNEL_DS); |
388 | 397 | ||
389 | while (pos < len) { | 398 | while (pos < len) { |
390 | ret = vfs_write(sctx->send_filp, (char *)buf + pos, len - pos, | 399 | ret = vfs_write(filp, (char *)buf + pos, len - pos, off); |
391 | &sctx->send_off); | ||
392 | /* TODO handle that correctly */ | 400 | /* TODO handle that correctly */ |
393 | /*if (ret == -ERESTARTSYS) { | 401 | /*if (ret == -ERESTARTSYS) { |
394 | continue; | 402 | continue; |
@@ -544,7 +552,8 @@ static int send_header(struct send_ctx *sctx) | |||
544 | strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC); | 552 | strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC); |
545 | hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION); | 553 | hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION); |
546 | 554 | ||
547 | return write_buf(sctx, &hdr, sizeof(hdr)); | 555 | return write_buf(sctx->send_filp, &hdr, sizeof(hdr), |
556 | &sctx->send_off); | ||
548 | } | 557 | } |
549 | 558 | ||
550 | /* | 559 | /* |
@@ -581,7 +590,8 @@ static int send_cmd(struct send_ctx *sctx) | |||
581 | crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); | 590 | crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); |
582 | hdr->crc = cpu_to_le32(crc); | 591 | hdr->crc = cpu_to_le32(crc); |
583 | 592 | ||
584 | ret = write_buf(sctx, sctx->send_buf, sctx->send_size); | 593 | ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size, |
594 | &sctx->send_off); | ||
585 | 595 | ||
586 | sctx->total_send_size += sctx->send_size; | 596 | sctx->total_send_size += sctx->send_size; |
587 | sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size; | 597 | sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size; |
@@ -687,7 +697,8 @@ out: | |||
687 | */ | 697 | */ |
688 | static int get_inode_info(struct btrfs_root *root, | 698 | static int get_inode_info(struct btrfs_root *root, |
689 | u64 ino, u64 *size, u64 *gen, | 699 | u64 ino, u64 *size, u64 *gen, |
690 | u64 *mode, u64 *uid, u64 *gid) | 700 | u64 *mode, u64 *uid, u64 *gid, |
701 | u64 *rdev) | ||
691 | { | 702 | { |
692 | int ret; | 703 | int ret; |
693 | struct btrfs_inode_item *ii; | 704 | struct btrfs_inode_item *ii; |
@@ -721,6 +732,8 @@ static int get_inode_info(struct btrfs_root *root, | |||
721 | *uid = btrfs_inode_uid(path->nodes[0], ii); | 732 | *uid = btrfs_inode_uid(path->nodes[0], ii); |
722 | if (gid) | 733 | if (gid) |
723 | *gid = btrfs_inode_gid(path->nodes[0], ii); | 734 | *gid = btrfs_inode_gid(path->nodes[0], ii); |
735 | if (rdev) | ||
736 | *rdev = btrfs_inode_rdev(path->nodes[0], ii); | ||
724 | 737 | ||
725 | out: | 738 | out: |
726 | btrfs_free_path(path); | 739 | btrfs_free_path(path); |
@@ -852,7 +865,6 @@ static int iterate_dir_item(struct send_ctx *sctx, | |||
852 | struct extent_buffer *eb; | 865 | struct extent_buffer *eb; |
853 | struct btrfs_item *item; | 866 | struct btrfs_item *item; |
854 | struct btrfs_dir_item *di; | 867 | struct btrfs_dir_item *di; |
855 | struct btrfs_path *tmp_path = NULL; | ||
856 | struct btrfs_key di_key; | 868 | struct btrfs_key di_key; |
857 | char *buf = NULL; | 869 | char *buf = NULL; |
858 | char *buf2 = NULL; | 870 | char *buf2 = NULL; |
@@ -874,12 +886,6 @@ static int iterate_dir_item(struct send_ctx *sctx, | |||
874 | goto out; | 886 | goto out; |
875 | } | 887 | } |
876 | 888 | ||
877 | tmp_path = alloc_path_for_send(); | ||
878 | if (!tmp_path) { | ||
879 | ret = -ENOMEM; | ||
880 | goto out; | ||
881 | } | ||
882 | |||
883 | eb = path->nodes[0]; | 889 | eb = path->nodes[0]; |
884 | slot = path->slots[0]; | 890 | slot = path->slots[0]; |
885 | item = btrfs_item_nr(eb, slot); | 891 | item = btrfs_item_nr(eb, slot); |
@@ -941,7 +947,6 @@ static int iterate_dir_item(struct send_ctx *sctx, | |||
941 | } | 947 | } |
942 | 948 | ||
943 | out: | 949 | out: |
944 | btrfs_free_path(tmp_path); | ||
945 | if (buf_virtual) | 950 | if (buf_virtual) |
946 | vfree(buf); | 951 | vfree(buf); |
947 | else | 952 | else |
@@ -1026,12 +1031,12 @@ struct backref_ctx { | |||
1026 | u64 extent_len; | 1031 | u64 extent_len; |
1027 | 1032 | ||
1028 | /* Just to check for bugs in backref resolving */ | 1033 | /* Just to check for bugs in backref resolving */ |
1029 | int found_in_send_root; | 1034 | int found_itself; |
1030 | }; | 1035 | }; |
1031 | 1036 | ||
1032 | static int __clone_root_cmp_bsearch(const void *key, const void *elt) | 1037 | static int __clone_root_cmp_bsearch(const void *key, const void *elt) |
1033 | { | 1038 | { |
1034 | u64 root = (u64)key; | 1039 | u64 root = (u64)(uintptr_t)key; |
1035 | struct clone_root *cr = (struct clone_root *)elt; | 1040 | struct clone_root *cr = (struct clone_root *)elt; |
1036 | 1041 | ||
1037 | if (root < cr->root->objectid) | 1042 | if (root < cr->root->objectid) |
@@ -1055,6 +1060,7 @@ static int __clone_root_cmp_sort(const void *e1, const void *e2) | |||
1055 | 1060 | ||
1056 | /* | 1061 | /* |
1057 | * Called for every backref that is found for the current extent. | 1062 | * Called for every backref that is found for the current extent. |
1063 | * Results are collected in sctx->clone_roots->ino/offset/found_refs | ||
1058 | */ | 1064 | */ |
1059 | static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) | 1065 | static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) |
1060 | { | 1066 | { |
@@ -1064,7 +1070,7 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) | |||
1064 | u64 i_size; | 1070 | u64 i_size; |
1065 | 1071 | ||
1066 | /* First check if the root is in the list of accepted clone sources */ | 1072 | /* First check if the root is in the list of accepted clone sources */ |
1067 | found = bsearch((void *)root, bctx->sctx->clone_roots, | 1073 | found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots, |
1068 | bctx->sctx->clone_roots_cnt, | 1074 | bctx->sctx->clone_roots_cnt, |
1069 | sizeof(struct clone_root), | 1075 | sizeof(struct clone_root), |
1070 | __clone_root_cmp_bsearch); | 1076 | __clone_root_cmp_bsearch); |
@@ -1074,14 +1080,15 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) | |||
1074 | if (found->root == bctx->sctx->send_root && | 1080 | if (found->root == bctx->sctx->send_root && |
1075 | ino == bctx->cur_objectid && | 1081 | ino == bctx->cur_objectid && |
1076 | offset == bctx->cur_offset) { | 1082 | offset == bctx->cur_offset) { |
1077 | bctx->found_in_send_root = 1; | 1083 | bctx->found_itself = 1; |
1078 | } | 1084 | } |
1079 | 1085 | ||
1080 | /* | 1086 | /* |
1081 | * There are inodes that have extents that lie behind it's i_size. Don't | 1087 | * There are inodes that have extents that lie behind its i_size. Don't |
1082 | * accept clones from these extents. | 1088 | * accept clones from these extents. |
1083 | */ | 1089 | */ |
1084 | ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL); | 1090 | ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL, |
1091 | NULL); | ||
1085 | if (ret < 0) | 1092 | if (ret < 0) |
1086 | return ret; | 1093 | return ret; |
1087 | 1094 | ||
@@ -1101,16 +1108,12 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) | |||
1101 | */ | 1108 | */ |
1102 | if (ino >= bctx->cur_objectid) | 1109 | if (ino >= bctx->cur_objectid) |
1103 | return 0; | 1110 | return 0; |
1104 | /*if (ino > ctx->cur_objectid) | 1111 | #if 0 |
1112 | if (ino > bctx->cur_objectid) | ||
1105 | return 0; | 1113 | return 0; |
1106 | if (offset + ctx->extent_len > ctx->cur_offset) | 1114 | if (offset + bctx->extent_len > bctx->cur_offset) |
1107 | return 0;*/ | 1115 | return 0; |
1108 | 1116 | #endif | |
1109 | bctx->found++; | ||
1110 | found->found_refs++; | ||
1111 | found->ino = ino; | ||
1112 | found->offset = offset; | ||
1113 | return 0; | ||
1114 | } | 1117 | } |
1115 | 1118 | ||
1116 | bctx->found++; | 1119 | bctx->found++; |
@@ -1130,6 +1133,12 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) | |||
1130 | } | 1133 | } |
1131 | 1134 | ||
1132 | /* | 1135 | /* |
1136 | * Given an inode, offset and extent item, it finds a good clone for a clone | ||
1137 | * instruction. Returns -ENOENT when none could be found. The function makes | ||
1138 | * sure that the returned clone is usable at the point where sending is at the | ||
1139 | * moment. This means, that no clones are accepted which lie behind the current | ||
1140 | * inode+offset. | ||
1141 | * | ||
1133 | * path must point to the extent item when called. | 1142 | * path must point to the extent item when called. |
1134 | */ | 1143 | */ |
1135 | static int find_extent_clone(struct send_ctx *sctx, | 1144 | static int find_extent_clone(struct send_ctx *sctx, |
@@ -1141,20 +1150,29 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
1141 | int ret; | 1150 | int ret; |
1142 | int extent_type; | 1151 | int extent_type; |
1143 | u64 logical; | 1152 | u64 logical; |
1153 | u64 disk_byte; | ||
1144 | u64 num_bytes; | 1154 | u64 num_bytes; |
1145 | u64 extent_item_pos; | 1155 | u64 extent_item_pos; |
1156 | u64 flags = 0; | ||
1146 | struct btrfs_file_extent_item *fi; | 1157 | struct btrfs_file_extent_item *fi; |
1147 | struct extent_buffer *eb = path->nodes[0]; | 1158 | struct extent_buffer *eb = path->nodes[0]; |
1148 | struct backref_ctx backref_ctx; | 1159 | struct backref_ctx *backref_ctx = NULL; |
1149 | struct clone_root *cur_clone_root; | 1160 | struct clone_root *cur_clone_root; |
1150 | struct btrfs_key found_key; | 1161 | struct btrfs_key found_key; |
1151 | struct btrfs_path *tmp_path; | 1162 | struct btrfs_path *tmp_path; |
1163 | int compressed; | ||
1152 | u32 i; | 1164 | u32 i; |
1153 | 1165 | ||
1154 | tmp_path = alloc_path_for_send(); | 1166 | tmp_path = alloc_path_for_send(); |
1155 | if (!tmp_path) | 1167 | if (!tmp_path) |
1156 | return -ENOMEM; | 1168 | return -ENOMEM; |
1157 | 1169 | ||
1170 | backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS); | ||
1171 | if (!backref_ctx) { | ||
1172 | ret = -ENOMEM; | ||
1173 | goto out; | ||
1174 | } | ||
1175 | |||
1158 | if (data_offset >= ino_size) { | 1176 | if (data_offset >= ino_size) { |
1159 | /* | 1177 | /* |
1160 | * There may be extents that lie behind the file's size. | 1178 | * There may be extents that lie behind the file's size. |
@@ -1172,22 +1190,23 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
1172 | ret = -ENOENT; | 1190 | ret = -ENOENT; |
1173 | goto out; | 1191 | goto out; |
1174 | } | 1192 | } |
1193 | compressed = btrfs_file_extent_compression(eb, fi); | ||
1175 | 1194 | ||
1176 | num_bytes = btrfs_file_extent_num_bytes(eb, fi); | 1195 | num_bytes = btrfs_file_extent_num_bytes(eb, fi); |
1177 | logical = btrfs_file_extent_disk_bytenr(eb, fi); | 1196 | disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); |
1178 | if (logical == 0) { | 1197 | if (disk_byte == 0) { |
1179 | ret = -ENOENT; | 1198 | ret = -ENOENT; |
1180 | goto out; | 1199 | goto out; |
1181 | } | 1200 | } |
1182 | logical += btrfs_file_extent_offset(eb, fi); | 1201 | logical = disk_byte + btrfs_file_extent_offset(eb, fi); |
1183 | 1202 | ||
1184 | ret = extent_from_logical(sctx->send_root->fs_info, | 1203 | ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path, |
1185 | logical, tmp_path, &found_key); | 1204 | &found_key, &flags); |
1186 | btrfs_release_path(tmp_path); | 1205 | btrfs_release_path(tmp_path); |
1187 | 1206 | ||
1188 | if (ret < 0) | 1207 | if (ret < 0) |
1189 | goto out; | 1208 | goto out; |
1190 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 1209 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
1191 | ret = -EIO; | 1210 | ret = -EIO; |
1192 | goto out; | 1211 | goto out; |
1193 | } | 1212 | } |
@@ -1202,12 +1221,12 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
1202 | cur_clone_root->found_refs = 0; | 1221 | cur_clone_root->found_refs = 0; |
1203 | } | 1222 | } |
1204 | 1223 | ||
1205 | backref_ctx.sctx = sctx; | 1224 | backref_ctx->sctx = sctx; |
1206 | backref_ctx.found = 0; | 1225 | backref_ctx->found = 0; |
1207 | backref_ctx.cur_objectid = ino; | 1226 | backref_ctx->cur_objectid = ino; |
1208 | backref_ctx.cur_offset = data_offset; | 1227 | backref_ctx->cur_offset = data_offset; |
1209 | backref_ctx.found_in_send_root = 0; | 1228 | backref_ctx->found_itself = 0; |
1210 | backref_ctx.extent_len = num_bytes; | 1229 | backref_ctx->extent_len = num_bytes; |
1211 | 1230 | ||
1212 | /* | 1231 | /* |
1213 | * The last extent of a file may be too large due to page alignment. | 1232 | * The last extent of a file may be too large due to page alignment. |
@@ -1215,25 +1234,31 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
1215 | * __iterate_backrefs work. | 1234 | * __iterate_backrefs work. |
1216 | */ | 1235 | */ |
1217 | if (data_offset + num_bytes >= ino_size) | 1236 | if (data_offset + num_bytes >= ino_size) |
1218 | backref_ctx.extent_len = ino_size - data_offset; | 1237 | backref_ctx->extent_len = ino_size - data_offset; |
1219 | 1238 | ||
1220 | /* | 1239 | /* |
1221 | * Now collect all backrefs. | 1240 | * Now collect all backrefs. |
1222 | */ | 1241 | */ |
1242 | if (compressed == BTRFS_COMPRESS_NONE) | ||
1243 | extent_item_pos = logical - found_key.objectid; | ||
1244 | else | ||
1245 | extent_item_pos = 0; | ||
1246 | |||
1223 | extent_item_pos = logical - found_key.objectid; | 1247 | extent_item_pos = logical - found_key.objectid; |
1224 | ret = iterate_extent_inodes(sctx->send_root->fs_info, | 1248 | ret = iterate_extent_inodes(sctx->send_root->fs_info, |
1225 | found_key.objectid, extent_item_pos, 1, | 1249 | found_key.objectid, extent_item_pos, 1, |
1226 | __iterate_backrefs, &backref_ctx); | 1250 | __iterate_backrefs, backref_ctx); |
1251 | |||
1227 | if (ret < 0) | 1252 | if (ret < 0) |
1228 | goto out; | 1253 | goto out; |
1229 | 1254 | ||
1230 | if (!backref_ctx.found_in_send_root) { | 1255 | if (!backref_ctx->found_itself) { |
1231 | /* found a bug in backref code? */ | 1256 | /* found a bug in backref code? */ |
1232 | ret = -EIO; | 1257 | ret = -EIO; |
1233 | printk(KERN_ERR "btrfs: ERROR did not find backref in " | 1258 | printk(KERN_ERR "btrfs: ERROR did not find backref in " |
1234 | "send_root. inode=%llu, offset=%llu, " | 1259 | "send_root. inode=%llu, offset=%llu, " |
1235 | "logical=%llu\n", | 1260 | "disk_byte=%llu found extent=%llu\n", |
1236 | ino, data_offset, logical); | 1261 | ino, data_offset, disk_byte, found_key.objectid); |
1237 | goto out; | 1262 | goto out; |
1238 | } | 1263 | } |
1239 | 1264 | ||
@@ -1242,7 +1267,7 @@ verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " | |||
1242 | "num_bytes=%llu, logical=%llu\n", | 1267 | "num_bytes=%llu, logical=%llu\n", |
1243 | data_offset, ino, num_bytes, logical); | 1268 | data_offset, ino, num_bytes, logical); |
1244 | 1269 | ||
1245 | if (!backref_ctx.found) | 1270 | if (!backref_ctx->found) |
1246 | verbose_printk("btrfs: no clones found\n"); | 1271 | verbose_printk("btrfs: no clones found\n"); |
1247 | 1272 | ||
1248 | cur_clone_root = NULL; | 1273 | cur_clone_root = NULL; |
@@ -1253,7 +1278,6 @@ verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " | |||
1253 | else if (sctx->clone_roots[i].root == sctx->send_root) | 1278 | else if (sctx->clone_roots[i].root == sctx->send_root) |
1254 | /* prefer clones from send_root over others */ | 1279 | /* prefer clones from send_root over others */ |
1255 | cur_clone_root = sctx->clone_roots + i; | 1280 | cur_clone_root = sctx->clone_roots + i; |
1256 | break; | ||
1257 | } | 1281 | } |
1258 | 1282 | ||
1259 | } | 1283 | } |
@@ -1267,6 +1291,7 @@ verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " | |||
1267 | 1291 | ||
1268 | out: | 1292 | out: |
1269 | btrfs_free_path(tmp_path); | 1293 | btrfs_free_path(tmp_path); |
1294 | kfree(backref_ctx); | ||
1270 | return ret; | 1295 | return ret; |
1271 | } | 1296 | } |
1272 | 1297 | ||
@@ -1307,8 +1332,6 @@ static int read_symlink(struct send_ctx *sctx, | |||
1307 | len = btrfs_file_extent_inline_len(path->nodes[0], ei); | 1332 | len = btrfs_file_extent_inline_len(path->nodes[0], ei); |
1308 | 1333 | ||
1309 | ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); | 1334 | ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); |
1310 | if (ret < 0) | ||
1311 | goto out; | ||
1312 | 1335 | ||
1313 | out: | 1336 | out: |
1314 | btrfs_free_path(path); | 1337 | btrfs_free_path(path); |
@@ -1404,7 +1427,7 @@ static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen) | |||
1404 | u64 right_gen; | 1427 | u64 right_gen; |
1405 | 1428 | ||
1406 | ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL, | 1429 | ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL, |
1407 | NULL); | 1430 | NULL, NULL); |
1408 | if (ret < 0 && ret != -ENOENT) | 1431 | if (ret < 0 && ret != -ENOENT) |
1409 | goto out; | 1432 | goto out; |
1410 | left_ret = ret; | 1433 | left_ret = ret; |
@@ -1413,16 +1436,16 @@ static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen) | |||
1413 | right_ret = -ENOENT; | 1436 | right_ret = -ENOENT; |
1414 | } else { | 1437 | } else { |
1415 | ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen, | 1438 | ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen, |
1416 | NULL, NULL, NULL); | 1439 | NULL, NULL, NULL, NULL); |
1417 | if (ret < 0 && ret != -ENOENT) | 1440 | if (ret < 0 && ret != -ENOENT) |
1418 | goto out; | 1441 | goto out; |
1419 | right_ret = ret; | 1442 | right_ret = ret; |
1420 | } | 1443 | } |
1421 | 1444 | ||
1422 | if (!left_ret && !right_ret) { | 1445 | if (!left_ret && !right_ret) { |
1423 | if (left_gen == gen && right_gen == gen) | 1446 | if (left_gen == gen && right_gen == gen) { |
1424 | ret = inode_state_no_change; | 1447 | ret = inode_state_no_change; |
1425 | else if (left_gen == gen) { | 1448 | } else if (left_gen == gen) { |
1426 | if (ino < sctx->send_progress) | 1449 | if (ino < sctx->send_progress) |
1427 | ret = inode_state_did_create; | 1450 | ret = inode_state_did_create; |
1428 | else | 1451 | else |
@@ -1516,6 +1539,10 @@ out: | |||
1516 | return ret; | 1539 | return ret; |
1517 | } | 1540 | } |
1518 | 1541 | ||
1542 | /* | ||
1543 | * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir, | ||
1544 | * generation of the parent dir and the name of the dir entry. | ||
1545 | */ | ||
1519 | static int get_first_ref(struct send_ctx *sctx, | 1546 | static int get_first_ref(struct send_ctx *sctx, |
1520 | struct btrfs_root *root, u64 ino, | 1547 | struct btrfs_root *root, u64 ino, |
1521 | u64 *dir, u64 *dir_gen, struct fs_path *name) | 1548 | u64 *dir, u64 *dir_gen, struct fs_path *name) |
@@ -1557,7 +1584,7 @@ static int get_first_ref(struct send_ctx *sctx, | |||
1557 | btrfs_release_path(path); | 1584 | btrfs_release_path(path); |
1558 | 1585 | ||
1559 | ret = get_inode_info(root, found_key.offset, NULL, dir_gen, NULL, NULL, | 1586 | ret = get_inode_info(root, found_key.offset, NULL, dir_gen, NULL, NULL, |
1560 | NULL); | 1587 | NULL, NULL); |
1561 | if (ret < 0) | 1588 | if (ret < 0) |
1562 | goto out; | 1589 | goto out; |
1563 | 1590 | ||
@@ -1586,22 +1613,28 @@ static int is_first_ref(struct send_ctx *sctx, | |||
1586 | if (ret < 0) | 1613 | if (ret < 0) |
1587 | goto out; | 1614 | goto out; |
1588 | 1615 | ||
1589 | if (name_len != fs_path_len(tmp_name)) { | 1616 | if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) { |
1590 | ret = 0; | 1617 | ret = 0; |
1591 | goto out; | 1618 | goto out; |
1592 | } | 1619 | } |
1593 | 1620 | ||
1594 | ret = memcmp(tmp_name->start, name, name_len); | 1621 | ret = !memcmp(tmp_name->start, name, name_len); |
1595 | if (ret) | ||
1596 | ret = 0; | ||
1597 | else | ||
1598 | ret = 1; | ||
1599 | 1622 | ||
1600 | out: | 1623 | out: |
1601 | fs_path_free(sctx, tmp_name); | 1624 | fs_path_free(sctx, tmp_name); |
1602 | return ret; | 1625 | return ret; |
1603 | } | 1626 | } |
1604 | 1627 | ||
1628 | /* | ||
1629 | * Used by process_recorded_refs to determine if a new ref would overwrite an | ||
1630 | * already existing ref. In case it detects an overwrite, it returns the | ||
1631 | * inode/gen in who_ino/who_gen. | ||
1632 | * When an overwrite is detected, process_recorded_refs does proper orphanizing | ||
1633 | * to make sure later references to the overwritten inode are possible. | ||
1634 | * Orphanizing is however only required for the first ref of an inode. | ||
1635 | * process_recorded_refs does an additional is_first_ref check to see if | ||
1636 | * orphanizing is really required. | ||
1637 | */ | ||
1605 | static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, | 1638 | static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, |
1606 | const char *name, int name_len, | 1639 | const char *name, int name_len, |
1607 | u64 *who_ino, u64 *who_gen) | 1640 | u64 *who_ino, u64 *who_gen) |
@@ -1626,9 +1659,14 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, | |||
1626 | goto out; | 1659 | goto out; |
1627 | } | 1660 | } |
1628 | 1661 | ||
1662 | /* | ||
1663 | * Check if the overwritten ref was already processed. If yes, the ref | ||
1664 | * was already unlinked/moved, so we can safely assume that we will not | ||
1665 | * overwrite anything at this point in time. | ||
1666 | */ | ||
1629 | if (other_inode > sctx->send_progress) { | 1667 | if (other_inode > sctx->send_progress) { |
1630 | ret = get_inode_info(sctx->parent_root, other_inode, NULL, | 1668 | ret = get_inode_info(sctx->parent_root, other_inode, NULL, |
1631 | who_gen, NULL, NULL, NULL); | 1669 | who_gen, NULL, NULL, NULL, NULL); |
1632 | if (ret < 0) | 1670 | if (ret < 0) |
1633 | goto out; | 1671 | goto out; |
1634 | 1672 | ||
@@ -1642,6 +1680,13 @@ out: | |||
1642 | return ret; | 1680 | return ret; |
1643 | } | 1681 | } |
1644 | 1682 | ||
1683 | /* | ||
1684 | * Checks if the ref was overwritten by an already processed inode. This is | ||
1685 | * used by __get_cur_name_and_parent to find out if the ref was orphanized and | ||
1686 | * thus the orphan name needs be used. | ||
1687 | * process_recorded_refs also uses it to avoid unlinking of refs that were | ||
1688 | * overwritten. | ||
1689 | */ | ||
1645 | static int did_overwrite_ref(struct send_ctx *sctx, | 1690 | static int did_overwrite_ref(struct send_ctx *sctx, |
1646 | u64 dir, u64 dir_gen, | 1691 | u64 dir, u64 dir_gen, |
1647 | u64 ino, u64 ino_gen, | 1692 | u64 ino, u64 ino_gen, |
@@ -1671,7 +1716,7 @@ static int did_overwrite_ref(struct send_ctx *sctx, | |||
1671 | } | 1716 | } |
1672 | 1717 | ||
1673 | ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL, | 1718 | ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL, |
1674 | NULL); | 1719 | NULL, NULL); |
1675 | if (ret < 0) | 1720 | if (ret < 0) |
1676 | goto out; | 1721 | goto out; |
1677 | 1722 | ||
@@ -1690,6 +1735,11 @@ out: | |||
1690 | return ret; | 1735 | return ret; |
1691 | } | 1736 | } |
1692 | 1737 | ||
1738 | /* | ||
1739 | * Same as did_overwrite_ref, but also checks if it is the first ref of an inode | ||
1740 | * that got overwritten. This is used by process_recorded_refs to determine | ||
1741 | * if it has to use the path as returned by get_cur_path or the orphan name. | ||
1742 | */ | ||
1693 | static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) | 1743 | static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) |
1694 | { | 1744 | { |
1695 | int ret = 0; | 1745 | int ret = 0; |
@@ -1710,39 +1760,40 @@ static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) | |||
1710 | 1760 | ||
1711 | ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen, | 1761 | ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen, |
1712 | name->start, fs_path_len(name)); | 1762 | name->start, fs_path_len(name)); |
1713 | if (ret < 0) | ||
1714 | goto out; | ||
1715 | 1763 | ||
1716 | out: | 1764 | out: |
1717 | fs_path_free(sctx, name); | 1765 | fs_path_free(sctx, name); |
1718 | return ret; | 1766 | return ret; |
1719 | } | 1767 | } |
1720 | 1768 | ||
1769 | /* | ||
1770 | * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit, | ||
1771 | * so we need to do some special handling in case we have clashes. This function | ||
1772 | * takes care of this with the help of name_cache_entry::radix_list. | ||
1773 | * In case of error, nce is kfreed. | ||
1774 | */ | ||
1721 | static int name_cache_insert(struct send_ctx *sctx, | 1775 | static int name_cache_insert(struct send_ctx *sctx, |
1722 | struct name_cache_entry *nce) | 1776 | struct name_cache_entry *nce) |
1723 | { | 1777 | { |
1724 | int ret = 0; | 1778 | int ret = 0; |
1725 | struct name_cache_entry **ncea; | 1779 | struct list_head *nce_head; |
1726 | 1780 | ||
1727 | ncea = radix_tree_lookup(&sctx->name_cache, nce->ino); | 1781 | nce_head = radix_tree_lookup(&sctx->name_cache, |
1728 | if (ncea) { | 1782 | (unsigned long)nce->ino); |
1729 | if (!ncea[0]) | 1783 | if (!nce_head) { |
1730 | ncea[0] = nce; | 1784 | nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); |
1731 | else if (!ncea[1]) | 1785 | if (!nce_head) |
1732 | ncea[1] = nce; | ||
1733 | else | ||
1734 | BUG(); | ||
1735 | } else { | ||
1736 | ncea = kmalloc(sizeof(void *) * 2, GFP_NOFS); | ||
1737 | if (!ncea) | ||
1738 | return -ENOMEM; | 1786 | return -ENOMEM; |
1787 | INIT_LIST_HEAD(nce_head); | ||
1739 | 1788 | ||
1740 | ncea[0] = nce; | 1789 | ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); |
1741 | ncea[1] = NULL; | 1790 | if (ret < 0) { |
1742 | ret = radix_tree_insert(&sctx->name_cache, nce->ino, ncea); | 1791 | kfree(nce_head); |
1743 | if (ret < 0) | 1792 | kfree(nce); |
1744 | return ret; | 1793 | return ret; |
1794 | } | ||
1745 | } | 1795 | } |
1796 | list_add_tail(&nce->radix_list, nce_head); | ||
1746 | list_add_tail(&nce->list, &sctx->name_cache_list); | 1797 | list_add_tail(&nce->list, &sctx->name_cache_list); |
1747 | sctx->name_cache_size++; | 1798 | sctx->name_cache_size++; |
1748 | 1799 | ||
@@ -1752,50 +1803,52 @@ static int name_cache_insert(struct send_ctx *sctx, | |||
1752 | static void name_cache_delete(struct send_ctx *sctx, | 1803 | static void name_cache_delete(struct send_ctx *sctx, |
1753 | struct name_cache_entry *nce) | 1804 | struct name_cache_entry *nce) |
1754 | { | 1805 | { |
1755 | struct name_cache_entry **ncea; | 1806 | struct list_head *nce_head; |
1756 | |||
1757 | ncea = radix_tree_lookup(&sctx->name_cache, nce->ino); | ||
1758 | BUG_ON(!ncea); | ||
1759 | |||
1760 | if (ncea[0] == nce) | ||
1761 | ncea[0] = NULL; | ||
1762 | else if (ncea[1] == nce) | ||
1763 | ncea[1] = NULL; | ||
1764 | else | ||
1765 | BUG(); | ||
1766 | 1807 | ||
1767 | if (!ncea[0] && !ncea[1]) { | 1808 | nce_head = radix_tree_lookup(&sctx->name_cache, |
1768 | radix_tree_delete(&sctx->name_cache, nce->ino); | 1809 | (unsigned long)nce->ino); |
1769 | kfree(ncea); | 1810 | BUG_ON(!nce_head); |
1770 | } | ||
1771 | 1811 | ||
1812 | list_del(&nce->radix_list); | ||
1772 | list_del(&nce->list); | 1813 | list_del(&nce->list); |
1773 | |||
1774 | sctx->name_cache_size--; | 1814 | sctx->name_cache_size--; |
1815 | |||
1816 | if (list_empty(nce_head)) { | ||
1817 | radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); | ||
1818 | kfree(nce_head); | ||
1819 | } | ||
1775 | } | 1820 | } |
1776 | 1821 | ||
1777 | static struct name_cache_entry *name_cache_search(struct send_ctx *sctx, | 1822 | static struct name_cache_entry *name_cache_search(struct send_ctx *sctx, |
1778 | u64 ino, u64 gen) | 1823 | u64 ino, u64 gen) |
1779 | { | 1824 | { |
1780 | struct name_cache_entry **ncea; | 1825 | struct list_head *nce_head; |
1826 | struct name_cache_entry *cur; | ||
1781 | 1827 | ||
1782 | ncea = radix_tree_lookup(&sctx->name_cache, ino); | 1828 | nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino); |
1783 | if (!ncea) | 1829 | if (!nce_head) |
1784 | return NULL; | 1830 | return NULL; |
1785 | 1831 | ||
1786 | if (ncea[0] && ncea[0]->gen == gen) | 1832 | list_for_each_entry(cur, nce_head, radix_list) { |
1787 | return ncea[0]; | 1833 | if (cur->ino == ino && cur->gen == gen) |
1788 | else if (ncea[1] && ncea[1]->gen == gen) | 1834 | return cur; |
1789 | return ncea[1]; | 1835 | } |
1790 | return NULL; | 1836 | return NULL; |
1791 | } | 1837 | } |
1792 | 1838 | ||
1839 | /* | ||
1840 | * Removes the entry from the list and adds it back to the end. This marks the | ||
1841 | * entry as recently used so that name_cache_clean_unused does not remove it. | ||
1842 | */ | ||
1793 | static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce) | 1843 | static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce) |
1794 | { | 1844 | { |
1795 | list_del(&nce->list); | 1845 | list_del(&nce->list); |
1796 | list_add_tail(&nce->list, &sctx->name_cache_list); | 1846 | list_add_tail(&nce->list, &sctx->name_cache_list); |
1797 | } | 1847 | } |
1798 | 1848 | ||
1849 | /* | ||
1850 | * Remove some entries from the beginning of name_cache_list. | ||
1851 | */ | ||
1799 | static void name_cache_clean_unused(struct send_ctx *sctx) | 1852 | static void name_cache_clean_unused(struct send_ctx *sctx) |
1800 | { | 1853 | { |
1801 | struct name_cache_entry *nce; | 1854 | struct name_cache_entry *nce; |
@@ -1814,13 +1867,23 @@ static void name_cache_clean_unused(struct send_ctx *sctx) | |||
1814 | static void name_cache_free(struct send_ctx *sctx) | 1867 | static void name_cache_free(struct send_ctx *sctx) |
1815 | { | 1868 | { |
1816 | struct name_cache_entry *nce; | 1869 | struct name_cache_entry *nce; |
1817 | struct name_cache_entry *tmp; | ||
1818 | 1870 | ||
1819 | list_for_each_entry_safe(nce, tmp, &sctx->name_cache_list, list) { | 1871 | while (!list_empty(&sctx->name_cache_list)) { |
1872 | nce = list_entry(sctx->name_cache_list.next, | ||
1873 | struct name_cache_entry, list); | ||
1820 | name_cache_delete(sctx, nce); | 1874 | name_cache_delete(sctx, nce); |
1875 | kfree(nce); | ||
1821 | } | 1876 | } |
1822 | } | 1877 | } |
1823 | 1878 | ||
1879 | /* | ||
1880 | * Used by get_cur_path for each ref up to the root. | ||
1881 | * Returns 0 if it succeeded. | ||
1882 | * Returns 1 if the inode is not existent or got overwritten. In that case, the | ||
1883 | * name is an orphan name. This instructs get_cur_path to stop iterating. If 1 | ||
1884 | * is returned, parent_ino/parent_gen are not guaranteed to be valid. | ||
1885 | * Returns <0 in case of error. | ||
1886 | */ | ||
1824 | static int __get_cur_name_and_parent(struct send_ctx *sctx, | 1887 | static int __get_cur_name_and_parent(struct send_ctx *sctx, |
1825 | u64 ino, u64 gen, | 1888 | u64 ino, u64 gen, |
1826 | u64 *parent_ino, | 1889 | u64 *parent_ino, |
@@ -1832,6 +1895,11 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
1832 | struct btrfs_path *path = NULL; | 1895 | struct btrfs_path *path = NULL; |
1833 | struct name_cache_entry *nce = NULL; | 1896 | struct name_cache_entry *nce = NULL; |
1834 | 1897 | ||
1898 | /* | ||
1899 | * First check if we already did a call to this function with the same | ||
1900 | * ino/gen. If yes, check if the cache entry is still up-to-date. If yes | ||
1901 | * return the cached result. | ||
1902 | */ | ||
1835 | nce = name_cache_search(sctx, ino, gen); | 1903 | nce = name_cache_search(sctx, ino, gen); |
1836 | if (nce) { | 1904 | if (nce) { |
1837 | if (ino < sctx->send_progress && nce->need_later_update) { | 1905 | if (ino < sctx->send_progress && nce->need_later_update) { |
@@ -1854,6 +1922,11 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
1854 | if (!path) | 1922 | if (!path) |
1855 | return -ENOMEM; | 1923 | return -ENOMEM; |
1856 | 1924 | ||
1925 | /* | ||
1926 | * If the inode is not existent yet, add the orphan name and return 1. | ||
1927 | * This should only happen for the parent dir that we determine in | ||
1928 | * __record_new_ref | ||
1929 | */ | ||
1857 | ret = is_inode_existent(sctx, ino, gen); | 1930 | ret = is_inode_existent(sctx, ino, gen); |
1858 | if (ret < 0) | 1931 | if (ret < 0) |
1859 | goto out; | 1932 | goto out; |
@@ -1866,6 +1939,10 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
1866 | goto out_cache; | 1939 | goto out_cache; |
1867 | } | 1940 | } |
1868 | 1941 | ||
1942 | /* | ||
1943 | * Depending on whether the inode was already processed or not, use | ||
1944 | * send_root or parent_root for ref lookup. | ||
1945 | */ | ||
1869 | if (ino < sctx->send_progress) | 1946 | if (ino < sctx->send_progress) |
1870 | ret = get_first_ref(sctx, sctx->send_root, ino, | 1947 | ret = get_first_ref(sctx, sctx->send_root, ino, |
1871 | parent_ino, parent_gen, dest); | 1948 | parent_ino, parent_gen, dest); |
@@ -1875,6 +1952,10 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
1875 | if (ret < 0) | 1952 | if (ret < 0) |
1876 | goto out; | 1953 | goto out; |
1877 | 1954 | ||
1955 | /* | ||
1956 | * Check if the ref was overwritten by an inode's ref that was processed | ||
1957 | * earlier. If yes, treat as orphan and return 1. | ||
1958 | */ | ||
1878 | ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen, | 1959 | ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen, |
1879 | dest->start, dest->end - dest->start); | 1960 | dest->start, dest->end - dest->start); |
1880 | if (ret < 0) | 1961 | if (ret < 0) |
@@ -1888,6 +1969,9 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
1888 | } | 1969 | } |
1889 | 1970 | ||
1890 | out_cache: | 1971 | out_cache: |
1972 | /* | ||
1973 | * Store the result of the lookup in the name cache. | ||
1974 | */ | ||
1891 | nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS); | 1975 | nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS); |
1892 | if (!nce) { | 1976 | if (!nce) { |
1893 | ret = -ENOMEM; | 1977 | ret = -ENOMEM; |
@@ -1901,7 +1985,6 @@ out_cache: | |||
1901 | nce->name_len = fs_path_len(dest); | 1985 | nce->name_len = fs_path_len(dest); |
1902 | nce->ret = ret; | 1986 | nce->ret = ret; |
1903 | strcpy(nce->name, dest->start); | 1987 | strcpy(nce->name, dest->start); |
1904 | memset(&nce->use_list, 0, sizeof(nce->use_list)); | ||
1905 | 1988 | ||
1906 | if (ino < sctx->send_progress) | 1989 | if (ino < sctx->send_progress) |
1907 | nce->need_later_update = 0; | 1990 | nce->need_later_update = 0; |
@@ -2107,9 +2190,6 @@ static int send_subvol_begin(struct send_ctx *sctx) | |||
2107 | read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen); | 2190 | read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen); |
2108 | btrfs_release_path(path); | 2191 | btrfs_release_path(path); |
2109 | 2192 | ||
2110 | if (ret < 0) | ||
2111 | goto out; | ||
2112 | |||
2113 | if (parent_root) { | 2193 | if (parent_root) { |
2114 | ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT); | 2194 | ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT); |
2115 | if (ret < 0) | 2195 | if (ret < 0) |
@@ -2276,7 +2356,7 @@ verbose_printk("btrfs: send_utimes %llu\n", ino); | |||
2276 | btrfs_inode_mtime(ii)); | 2356 | btrfs_inode_mtime(ii)); |
2277 | TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, | 2357 | TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, |
2278 | btrfs_inode_ctime(ii)); | 2358 | btrfs_inode_ctime(ii)); |
2279 | /* TODO otime? */ | 2359 | /* TODO Add otime support when the otime patches get into upstream */ |
2280 | 2360 | ||
2281 | ret = send_cmd(sctx); | 2361 | ret = send_cmd(sctx); |
2282 | 2362 | ||
@@ -2292,39 +2372,39 @@ out: | |||
2292 | * a valid path yet because we did not process the refs yet. So, the inode | 2372 | * a valid path yet because we did not process the refs yet. So, the inode |
2293 | * is created as orphan. | 2373 | * is created as orphan. |
2294 | */ | 2374 | */ |
2295 | static int send_create_inode(struct send_ctx *sctx, struct btrfs_path *path, | 2375 | static int send_create_inode(struct send_ctx *sctx, u64 ino) |
2296 | struct btrfs_key *key) | ||
2297 | { | 2376 | { |
2298 | int ret = 0; | 2377 | int ret = 0; |
2299 | struct extent_buffer *eb = path->nodes[0]; | ||
2300 | struct btrfs_inode_item *ii; | ||
2301 | struct fs_path *p; | 2378 | struct fs_path *p; |
2302 | int slot = path->slots[0]; | ||
2303 | int cmd; | 2379 | int cmd; |
2380 | u64 gen; | ||
2304 | u64 mode; | 2381 | u64 mode; |
2382 | u64 rdev; | ||
2305 | 2383 | ||
2306 | verbose_printk("btrfs: send_create_inode %llu\n", sctx->cur_ino); | 2384 | verbose_printk("btrfs: send_create_inode %llu\n", ino); |
2307 | 2385 | ||
2308 | p = fs_path_alloc(sctx); | 2386 | p = fs_path_alloc(sctx); |
2309 | if (!p) | 2387 | if (!p) |
2310 | return -ENOMEM; | 2388 | return -ENOMEM; |
2311 | 2389 | ||
2312 | ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); | 2390 | ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, NULL, |
2313 | mode = btrfs_inode_mode(eb, ii); | 2391 | NULL, &rdev); |
2392 | if (ret < 0) | ||
2393 | goto out; | ||
2314 | 2394 | ||
2315 | if (S_ISREG(mode)) | 2395 | if (S_ISREG(mode)) { |
2316 | cmd = BTRFS_SEND_C_MKFILE; | 2396 | cmd = BTRFS_SEND_C_MKFILE; |
2317 | else if (S_ISDIR(mode)) | 2397 | } else if (S_ISDIR(mode)) { |
2318 | cmd = BTRFS_SEND_C_MKDIR; | 2398 | cmd = BTRFS_SEND_C_MKDIR; |
2319 | else if (S_ISLNK(mode)) | 2399 | } else if (S_ISLNK(mode)) { |
2320 | cmd = BTRFS_SEND_C_SYMLINK; | 2400 | cmd = BTRFS_SEND_C_SYMLINK; |
2321 | else if (S_ISCHR(mode) || S_ISBLK(mode)) | 2401 | } else if (S_ISCHR(mode) || S_ISBLK(mode)) { |
2322 | cmd = BTRFS_SEND_C_MKNOD; | 2402 | cmd = BTRFS_SEND_C_MKNOD; |
2323 | else if (S_ISFIFO(mode)) | 2403 | } else if (S_ISFIFO(mode)) { |
2324 | cmd = BTRFS_SEND_C_MKFIFO; | 2404 | cmd = BTRFS_SEND_C_MKFIFO; |
2325 | else if (S_ISSOCK(mode)) | 2405 | } else if (S_ISSOCK(mode)) { |
2326 | cmd = BTRFS_SEND_C_MKSOCK; | 2406 | cmd = BTRFS_SEND_C_MKSOCK; |
2327 | else { | 2407 | } else { |
2328 | printk(KERN_WARNING "btrfs: unexpected inode type %o", | 2408 | printk(KERN_WARNING "btrfs: unexpected inode type %o", |
2329 | (int)(mode & S_IFMT)); | 2409 | (int)(mode & S_IFMT)); |
2330 | ret = -ENOTSUPP; | 2410 | ret = -ENOTSUPP; |
@@ -2335,22 +2415,22 @@ verbose_printk("btrfs: send_create_inode %llu\n", sctx->cur_ino); | |||
2335 | if (ret < 0) | 2415 | if (ret < 0) |
2336 | goto out; | 2416 | goto out; |
2337 | 2417 | ||
2338 | ret = gen_unique_name(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); | 2418 | ret = gen_unique_name(sctx, ino, gen, p); |
2339 | if (ret < 0) | 2419 | if (ret < 0) |
2340 | goto out; | 2420 | goto out; |
2341 | 2421 | ||
2342 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); | 2422 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); |
2343 | TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, sctx->cur_ino); | 2423 | TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino); |
2344 | 2424 | ||
2345 | if (S_ISLNK(mode)) { | 2425 | if (S_ISLNK(mode)) { |
2346 | fs_path_reset(p); | 2426 | fs_path_reset(p); |
2347 | ret = read_symlink(sctx, sctx->send_root, sctx->cur_ino, p); | 2427 | ret = read_symlink(sctx, sctx->send_root, ino, p); |
2348 | if (ret < 0) | 2428 | if (ret < 0) |
2349 | goto out; | 2429 | goto out; |
2350 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); | 2430 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); |
2351 | } else if (S_ISCHR(mode) || S_ISBLK(mode) || | 2431 | } else if (S_ISCHR(mode) || S_ISBLK(mode) || |
2352 | S_ISFIFO(mode) || S_ISSOCK(mode)) { | 2432 | S_ISFIFO(mode) || S_ISSOCK(mode)) { |
2353 | TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, btrfs_inode_rdev(eb, ii)); | 2433 | TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, rdev); |
2354 | } | 2434 | } |
2355 | 2435 | ||
2356 | ret = send_cmd(sctx); | 2436 | ret = send_cmd(sctx); |
@@ -2364,6 +2444,92 @@ out: | |||
2364 | return ret; | 2444 | return ret; |
2365 | } | 2445 | } |
2366 | 2446 | ||
2447 | /* | ||
2448 | * We need some special handling for inodes that get processed before the parent | ||
2449 | * directory got created. See process_recorded_refs for details. | ||
2450 | * This function does the check if we already created the dir out of order. | ||
2451 | */ | ||
2452 | static int did_create_dir(struct send_ctx *sctx, u64 dir) | ||
2453 | { | ||
2454 | int ret = 0; | ||
2455 | struct btrfs_path *path = NULL; | ||
2456 | struct btrfs_key key; | ||
2457 | struct btrfs_key found_key; | ||
2458 | struct btrfs_key di_key; | ||
2459 | struct extent_buffer *eb; | ||
2460 | struct btrfs_dir_item *di; | ||
2461 | int slot; | ||
2462 | |||
2463 | path = alloc_path_for_send(); | ||
2464 | if (!path) { | ||
2465 | ret = -ENOMEM; | ||
2466 | goto out; | ||
2467 | } | ||
2468 | |||
2469 | key.objectid = dir; | ||
2470 | key.type = BTRFS_DIR_INDEX_KEY; | ||
2471 | key.offset = 0; | ||
2472 | while (1) { | ||
2473 | ret = btrfs_search_slot_for_read(sctx->send_root, &key, path, | ||
2474 | 1, 0); | ||
2475 | if (ret < 0) | ||
2476 | goto out; | ||
2477 | if (!ret) { | ||
2478 | eb = path->nodes[0]; | ||
2479 | slot = path->slots[0]; | ||
2480 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
2481 | } | ||
2482 | if (ret || found_key.objectid != key.objectid || | ||
2483 | found_key.type != key.type) { | ||
2484 | ret = 0; | ||
2485 | goto out; | ||
2486 | } | ||
2487 | |||
2488 | di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); | ||
2489 | btrfs_dir_item_key_to_cpu(eb, di, &di_key); | ||
2490 | |||
2491 | if (di_key.objectid < sctx->send_progress) { | ||
2492 | ret = 1; | ||
2493 | goto out; | ||
2494 | } | ||
2495 | |||
2496 | key.offset = found_key.offset + 1; | ||
2497 | btrfs_release_path(path); | ||
2498 | } | ||
2499 | |||
2500 | out: | ||
2501 | btrfs_free_path(path); | ||
2502 | return ret; | ||
2503 | } | ||
2504 | |||
2505 | /* | ||
2506 | * Only creates the inode if it is: | ||
2507 | * 1. Not a directory | ||
2508 | * 2. Or a directory which was not created already due to out of order | ||
2509 | * directories. See did_create_dir and process_recorded_refs for details. | ||
2510 | */ | ||
2511 | static int send_create_inode_if_needed(struct send_ctx *sctx) | ||
2512 | { | ||
2513 | int ret; | ||
2514 | |||
2515 | if (S_ISDIR(sctx->cur_inode_mode)) { | ||
2516 | ret = did_create_dir(sctx, sctx->cur_ino); | ||
2517 | if (ret < 0) | ||
2518 | goto out; | ||
2519 | if (ret) { | ||
2520 | ret = 0; | ||
2521 | goto out; | ||
2522 | } | ||
2523 | } | ||
2524 | |||
2525 | ret = send_create_inode(sctx, sctx->cur_ino); | ||
2526 | if (ret < 0) | ||
2527 | goto out; | ||
2528 | |||
2529 | out: | ||
2530 | return ret; | ||
2531 | } | ||
2532 | |||
2367 | struct recorded_ref { | 2533 | struct recorded_ref { |
2368 | struct list_head list; | 2534 | struct list_head list; |
2369 | char *dir_path; | 2535 | char *dir_path; |
@@ -2416,13 +2582,13 @@ static int record_ref(struct list_head *head, u64 dir, | |||
2416 | static void __free_recorded_refs(struct send_ctx *sctx, struct list_head *head) | 2582 | static void __free_recorded_refs(struct send_ctx *sctx, struct list_head *head) |
2417 | { | 2583 | { |
2418 | struct recorded_ref *cur; | 2584 | struct recorded_ref *cur; |
2419 | struct recorded_ref *tmp; | ||
2420 | 2585 | ||
2421 | list_for_each_entry_safe(cur, tmp, head, list) { | 2586 | while (!list_empty(head)) { |
2587 | cur = list_entry(head->next, struct recorded_ref, list); | ||
2422 | fs_path_free(sctx, cur->full_path); | 2588 | fs_path_free(sctx, cur->full_path); |
2589 | list_del(&cur->list); | ||
2423 | kfree(cur); | 2590 | kfree(cur); |
2424 | } | 2591 | } |
2425 | INIT_LIST_HEAD(head); | ||
2426 | } | 2592 | } |
2427 | 2593 | ||
2428 | static void free_recorded_refs(struct send_ctx *sctx) | 2594 | static void free_recorded_refs(struct send_ctx *sctx) |
@@ -2432,7 +2598,7 @@ static void free_recorded_refs(struct send_ctx *sctx) | |||
2432 | } | 2598 | } |
2433 | 2599 | ||
2434 | /* | 2600 | /* |
2435 | * Renames/moves a file/dir to it's orphan name. Used when the first | 2601 | * Renames/moves a file/dir to its orphan name. Used when the first |
2436 | * ref of an unprocessed inode gets overwritten and for all non empty | 2602 | * ref of an unprocessed inode gets overwritten and for all non empty |
2437 | * directories. | 2603 | * directories. |
2438 | */ | 2604 | */ |
@@ -2472,6 +2638,12 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress) | |||
2472 | struct btrfs_key loc; | 2638 | struct btrfs_key loc; |
2473 | struct btrfs_dir_item *di; | 2639 | struct btrfs_dir_item *di; |
2474 | 2640 | ||
2641 | /* | ||
2642 | * Don't try to rmdir the top/root subvolume dir. | ||
2643 | */ | ||
2644 | if (dir == BTRFS_FIRST_FREE_OBJECTID) | ||
2645 | return 0; | ||
2646 | |||
2475 | path = alloc_path_for_send(); | 2647 | path = alloc_path_for_send(); |
2476 | if (!path) | 2648 | if (!path) |
2477 | return -ENOMEM; | 2649 | return -ENOMEM; |
@@ -2513,160 +2685,6 @@ out: | |||
2513 | return ret; | 2685 | return ret; |
2514 | } | 2686 | } |
2515 | 2687 | ||
2516 | struct finish_unordered_dir_ctx { | ||
2517 | struct send_ctx *sctx; | ||
2518 | struct fs_path *cur_path; | ||
2519 | struct fs_path *dir_path; | ||
2520 | u64 dir_ino; | ||
2521 | int need_delete; | ||
2522 | int delete_pass; | ||
2523 | }; | ||
2524 | |||
2525 | int __finish_unordered_dir(int num, struct btrfs_key *di_key, | ||
2526 | const char *name, int name_len, | ||
2527 | const char *data, int data_len, | ||
2528 | u8 type, void *ctx) | ||
2529 | { | ||
2530 | int ret = 0; | ||
2531 | struct finish_unordered_dir_ctx *fctx = ctx; | ||
2532 | struct send_ctx *sctx = fctx->sctx; | ||
2533 | u64 di_gen; | ||
2534 | u64 di_mode; | ||
2535 | int is_orphan = 0; | ||
2536 | |||
2537 | if (di_key->objectid >= fctx->dir_ino) | ||
2538 | goto out; | ||
2539 | |||
2540 | fs_path_reset(fctx->cur_path); | ||
2541 | |||
2542 | ret = get_inode_info(sctx->send_root, di_key->objectid, | ||
2543 | NULL, &di_gen, &di_mode, NULL, NULL); | ||
2544 | if (ret < 0) | ||
2545 | goto out; | ||
2546 | |||
2547 | ret = is_first_ref(sctx, sctx->send_root, di_key->objectid, | ||
2548 | fctx->dir_ino, name, name_len); | ||
2549 | if (ret < 0) | ||
2550 | goto out; | ||
2551 | if (ret) { | ||
2552 | is_orphan = 1; | ||
2553 | ret = gen_unique_name(sctx, di_key->objectid, di_gen, | ||
2554 | fctx->cur_path); | ||
2555 | } else { | ||
2556 | ret = get_cur_path(sctx, di_key->objectid, di_gen, | ||
2557 | fctx->cur_path); | ||
2558 | } | ||
2559 | if (ret < 0) | ||
2560 | goto out; | ||
2561 | |||
2562 | ret = fs_path_add(fctx->dir_path, name, name_len); | ||
2563 | if (ret < 0) | ||
2564 | goto out; | ||
2565 | |||
2566 | if (!fctx->delete_pass) { | ||
2567 | if (S_ISDIR(di_mode)) { | ||
2568 | ret = send_rename(sctx, fctx->cur_path, | ||
2569 | fctx->dir_path); | ||
2570 | } else { | ||
2571 | ret = send_link(sctx, fctx->dir_path, | ||
2572 | fctx->cur_path); | ||
2573 | if (is_orphan) | ||
2574 | fctx->need_delete = 1; | ||
2575 | } | ||
2576 | } else if (!S_ISDIR(di_mode)) { | ||
2577 | ret = send_unlink(sctx, fctx->cur_path); | ||
2578 | } else { | ||
2579 | ret = 0; | ||
2580 | } | ||
2581 | |||
2582 | fs_path_remove(fctx->dir_path); | ||
2583 | |||
2584 | out: | ||
2585 | return ret; | ||
2586 | } | ||
2587 | |||
2588 | /* | ||
2589 | * Go through all dir items and see if we find refs which could not be created | ||
2590 | * in the past because the dir did not exist at that time. | ||
2591 | */ | ||
2592 | static int finish_outoforder_dir(struct send_ctx *sctx, u64 dir, u64 dir_gen) | ||
2593 | { | ||
2594 | int ret = 0; | ||
2595 | struct btrfs_path *path = NULL; | ||
2596 | struct btrfs_key key; | ||
2597 | struct btrfs_key found_key; | ||
2598 | struct extent_buffer *eb; | ||
2599 | struct finish_unordered_dir_ctx fctx; | ||
2600 | int slot; | ||
2601 | |||
2602 | path = alloc_path_for_send(); | ||
2603 | if (!path) { | ||
2604 | ret = -ENOMEM; | ||
2605 | goto out; | ||
2606 | } | ||
2607 | |||
2608 | memset(&fctx, 0, sizeof(fctx)); | ||
2609 | fctx.sctx = sctx; | ||
2610 | fctx.cur_path = fs_path_alloc(sctx); | ||
2611 | fctx.dir_path = fs_path_alloc(sctx); | ||
2612 | if (!fctx.cur_path || !fctx.dir_path) { | ||
2613 | ret = -ENOMEM; | ||
2614 | goto out; | ||
2615 | } | ||
2616 | fctx.dir_ino = dir; | ||
2617 | |||
2618 | ret = get_cur_path(sctx, dir, dir_gen, fctx.dir_path); | ||
2619 | if (ret < 0) | ||
2620 | goto out; | ||
2621 | |||
2622 | /* | ||
2623 | * We do two passes. The first links in the new refs and the second | ||
2624 | * deletes orphans if required. Deletion of orphans is not required for | ||
2625 | * directory inodes, as we always have only one ref and use rename | ||
2626 | * instead of link for those. | ||
2627 | */ | ||
2628 | |||
2629 | again: | ||
2630 | key.objectid = dir; | ||
2631 | key.type = BTRFS_DIR_ITEM_KEY; | ||
2632 | key.offset = 0; | ||
2633 | while (1) { | ||
2634 | ret = btrfs_search_slot_for_read(sctx->send_root, &key, path, | ||
2635 | 1, 0); | ||
2636 | if (ret < 0) | ||
2637 | goto out; | ||
2638 | eb = path->nodes[0]; | ||
2639 | slot = path->slots[0]; | ||
2640 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
2641 | |||
2642 | if (found_key.objectid != key.objectid || | ||
2643 | found_key.type != key.type) { | ||
2644 | btrfs_release_path(path); | ||
2645 | break; | ||
2646 | } | ||
2647 | |||
2648 | ret = iterate_dir_item(sctx, sctx->send_root, path, | ||
2649 | &found_key, __finish_unordered_dir, | ||
2650 | &fctx); | ||
2651 | if (ret < 0) | ||
2652 | goto out; | ||
2653 | |||
2654 | key.offset = found_key.offset + 1; | ||
2655 | btrfs_release_path(path); | ||
2656 | } | ||
2657 | |||
2658 | if (!fctx.delete_pass && fctx.need_delete) { | ||
2659 | fctx.delete_pass = 1; | ||
2660 | goto again; | ||
2661 | } | ||
2662 | |||
2663 | out: | ||
2664 | btrfs_free_path(path); | ||
2665 | fs_path_free(sctx, fctx.cur_path); | ||
2666 | fs_path_free(sctx, fctx.dir_path); | ||
2667 | return ret; | ||
2668 | } | ||
2669 | |||
2670 | /* | 2688 | /* |
2671 | * This does all the move/link/unlink/rmdir magic. | 2689 | * This does all the move/link/unlink/rmdir magic. |
2672 | */ | 2690 | */ |
@@ -2674,6 +2692,7 @@ static int process_recorded_refs(struct send_ctx *sctx) | |||
2674 | { | 2692 | { |
2675 | int ret = 0; | 2693 | int ret = 0; |
2676 | struct recorded_ref *cur; | 2694 | struct recorded_ref *cur; |
2695 | struct recorded_ref *cur2; | ||
2677 | struct ulist *check_dirs = NULL; | 2696 | struct ulist *check_dirs = NULL; |
2678 | struct ulist_iterator uit; | 2697 | struct ulist_iterator uit; |
2679 | struct ulist_node *un; | 2698 | struct ulist_node *un; |
@@ -2685,6 +2704,12 @@ static int process_recorded_refs(struct send_ctx *sctx) | |||
2685 | 2704 | ||
2686 | verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | 2705 | verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); |
2687 | 2706 | ||
2707 | /* | ||
2708 | * This should never happen as the root dir always has the same ref | ||
2709 | * which is always '..' | ||
2710 | */ | ||
2711 | BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID); | ||
2712 | |||
2688 | valid_path = fs_path_alloc(sctx); | 2713 | valid_path = fs_path_alloc(sctx); |
2689 | if (!valid_path) { | 2714 | if (!valid_path) { |
2690 | ret = -ENOMEM; | 2715 | ret = -ENOMEM; |
@@ -2731,6 +2756,46 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
2731 | 2756 | ||
2732 | list_for_each_entry(cur, &sctx->new_refs, list) { | 2757 | list_for_each_entry(cur, &sctx->new_refs, list) { |
2733 | /* | 2758 | /* |
2759 | * We may have refs where the parent directory does not exist | ||
2760 | * yet. This happens if the parent directories inum is higher | ||
2761 | * the the current inum. To handle this case, we create the | ||
2762 | * parent directory out of order. But we need to check if this | ||
2763 | * did already happen before due to other refs in the same dir. | ||
2764 | */ | ||
2765 | ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); | ||
2766 | if (ret < 0) | ||
2767 | goto out; | ||
2768 | if (ret == inode_state_will_create) { | ||
2769 | ret = 0; | ||
2770 | /* | ||
2771 | * First check if any of the current inodes refs did | ||
2772 | * already create the dir. | ||
2773 | */ | ||
2774 | list_for_each_entry(cur2, &sctx->new_refs, list) { | ||
2775 | if (cur == cur2) | ||
2776 | break; | ||
2777 | if (cur2->dir == cur->dir) { | ||
2778 | ret = 1; | ||
2779 | break; | ||
2780 | } | ||
2781 | } | ||
2782 | |||
2783 | /* | ||
2784 | * If that did not happen, check if a previous inode | ||
2785 | * did already create the dir. | ||
2786 | */ | ||
2787 | if (!ret) | ||
2788 | ret = did_create_dir(sctx, cur->dir); | ||
2789 | if (ret < 0) | ||
2790 | goto out; | ||
2791 | if (!ret) { | ||
2792 | ret = send_create_inode(sctx, cur->dir); | ||
2793 | if (ret < 0) | ||
2794 | goto out; | ||
2795 | } | ||
2796 | } | ||
2797 | |||
2798 | /* | ||
2734 | * Check if this new ref would overwrite the first ref of | 2799 | * Check if this new ref would overwrite the first ref of |
2735 | * another unprocessed inode. If yes, orphanize the | 2800 | * another unprocessed inode. If yes, orphanize the |
2736 | * overwritten inode. If we find an overwritten ref that is | 2801 | * overwritten inode. If we find an overwritten ref that is |
@@ -2764,7 +2829,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
2764 | * inode, move it and update valid_path. If not, link or move | 2829 | * inode, move it and update valid_path. If not, link or move |
2765 | * it depending on the inode mode. | 2830 | * it depending on the inode mode. |
2766 | */ | 2831 | */ |
2767 | if (is_orphan && !sctx->cur_inode_first_ref_orphan) { | 2832 | if (is_orphan) { |
2768 | ret = send_rename(sctx, valid_path, cur->full_path); | 2833 | ret = send_rename(sctx, valid_path, cur->full_path); |
2769 | if (ret < 0) | 2834 | if (ret < 0) |
2770 | goto out; | 2835 | goto out; |
@@ -2827,6 +2892,17 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
2827 | if (ret < 0) | 2892 | if (ret < 0) |
2828 | goto out; | 2893 | goto out; |
2829 | } | 2894 | } |
2895 | } else if (S_ISDIR(sctx->cur_inode_mode) && | ||
2896 | !list_empty(&sctx->deleted_refs)) { | ||
2897 | /* | ||
2898 | * We have a moved dir. Add the old parent to check_dirs | ||
2899 | */ | ||
2900 | cur = list_entry(sctx->deleted_refs.next, struct recorded_ref, | ||
2901 | list); | ||
2902 | ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, | ||
2903 | GFP_NOFS); | ||
2904 | if (ret < 0) | ||
2905 | goto out; | ||
2830 | } else if (!S_ISDIR(sctx->cur_inode_mode)) { | 2906 | } else if (!S_ISDIR(sctx->cur_inode_mode)) { |
2831 | /* | 2907 | /* |
2832 | * We have a non dir inode. Go through all deleted refs and | 2908 | * We have a non dir inode. Go through all deleted refs and |
@@ -2840,35 +2916,9 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
2840 | if (ret < 0) | 2916 | if (ret < 0) |
2841 | goto out; | 2917 | goto out; |
2842 | if (!ret) { | 2918 | if (!ret) { |
2843 | /* | 2919 | ret = send_unlink(sctx, cur->full_path); |
2844 | * In case the inode was moved to a directory | 2920 | if (ret < 0) |
2845 | * that was not created yet (see | 2921 | goto out; |
2846 | * __record_new_ref), we can not unlink the ref | ||
2847 | * as it will be needed later when the parent | ||
2848 | * directory is created, so that we can move in | ||
2849 | * the inode to the new dir. | ||
2850 | */ | ||
2851 | if (!is_orphan && | ||
2852 | sctx->cur_inode_first_ref_orphan) { | ||
2853 | ret = orphanize_inode(sctx, | ||
2854 | sctx->cur_ino, | ||
2855 | sctx->cur_inode_gen, | ||
2856 | cur->full_path); | ||
2857 | if (ret < 0) | ||
2858 | goto out; | ||
2859 | ret = gen_unique_name(sctx, | ||
2860 | sctx->cur_ino, | ||
2861 | sctx->cur_inode_gen, | ||
2862 | valid_path); | ||
2863 | if (ret < 0) | ||
2864 | goto out; | ||
2865 | is_orphan = 1; | ||
2866 | |||
2867 | } else { | ||
2868 | ret = send_unlink(sctx, cur->full_path); | ||
2869 | if (ret < 0) | ||
2870 | goto out; | ||
2871 | } | ||
2872 | } | 2922 | } |
2873 | ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, | 2923 | ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, |
2874 | GFP_NOFS); | 2924 | GFP_NOFS); |
@@ -2880,12 +2930,11 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
2880 | * If the inode is still orphan, unlink the orphan. This may | 2930 | * If the inode is still orphan, unlink the orphan. This may |
2881 | * happen when a previous inode did overwrite the first ref | 2931 | * happen when a previous inode did overwrite the first ref |
2882 | * of this inode and no new refs were added for the current | 2932 | * of this inode and no new refs were added for the current |
2883 | * inode. | 2933 | * inode. Unlinking does not mean that the inode is deleted in |
2884 | * We can however not delete the orphan in case the inode relies | 2934 | * all cases. There may still be links to this inode in other |
2885 | * in a directory that was not created yet (see | 2935 | * places. |
2886 | * __record_new_ref) | ||
2887 | */ | 2936 | */ |
2888 | if (is_orphan && !sctx->cur_inode_first_ref_orphan) { | 2937 | if (is_orphan) { |
2889 | ret = send_unlink(sctx, valid_path); | 2938 | ret = send_unlink(sctx, valid_path); |
2890 | if (ret < 0) | 2939 | if (ret < 0) |
2891 | goto out; | 2940 | goto out; |
@@ -2900,6 +2949,11 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
2900 | */ | 2949 | */ |
2901 | ULIST_ITER_INIT(&uit); | 2950 | ULIST_ITER_INIT(&uit); |
2902 | while ((un = ulist_next(check_dirs, &uit))) { | 2951 | while ((un = ulist_next(check_dirs, &uit))) { |
2952 | /* | ||
2953 | * In case we had refs into dirs that were not processed yet, | ||
2954 | * we don't need to do the utime and rmdir logic for these dirs. | ||
2955 | * The dir will be processed later. | ||
2956 | */ | ||
2903 | if (un->val > sctx->cur_ino) | 2957 | if (un->val > sctx->cur_ino) |
2904 | continue; | 2958 | continue; |
2905 | 2959 | ||
@@ -2929,25 +2983,6 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
2929 | } | 2983 | } |
2930 | } | 2984 | } |
2931 | 2985 | ||
2932 | /* | ||
2933 | * Current inode is now at it's new position, so we must increase | ||
2934 | * send_progress | ||
2935 | */ | ||
2936 | sctx->send_progress = sctx->cur_ino + 1; | ||
2937 | |||
2938 | /* | ||
2939 | * We may have a directory here that has pending refs which could not | ||
2940 | * be created before (because the dir did not exist before, see | ||
2941 | * __record_new_ref). finish_outoforder_dir will link/move the pending | ||
2942 | * refs. | ||
2943 | */ | ||
2944 | if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_new) { | ||
2945 | ret = finish_outoforder_dir(sctx, sctx->cur_ino, | ||
2946 | sctx->cur_inode_gen); | ||
2947 | if (ret < 0) | ||
2948 | goto out; | ||
2949 | } | ||
2950 | |||
2951 | ret = 0; | 2986 | ret = 0; |
2952 | 2987 | ||
2953 | out: | 2988 | out: |
@@ -2971,34 +3006,9 @@ static int __record_new_ref(int num, u64 dir, int index, | |||
2971 | return -ENOMEM; | 3006 | return -ENOMEM; |
2972 | 3007 | ||
2973 | ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL, | 3008 | ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL, |
2974 | NULL); | 3009 | NULL, NULL); |
2975 | if (ret < 0) | ||
2976 | goto out; | ||
2977 | |||
2978 | /* | ||
2979 | * The parent may be non-existent at this point in time. This happens | ||
2980 | * if the ino of the parent dir is higher then the current ino. In this | ||
2981 | * case, we can not process this ref until the parent dir is finally | ||
2982 | * created. If we reach the parent dir later, process_recorded_refs | ||
2983 | * will go through all dir items and process the refs that could not be | ||
2984 | * processed before. In case this is the first ref, we set | ||
2985 | * cur_inode_first_ref_orphan to 1 to inform process_recorded_refs to | ||
2986 | * keep an orphan of the inode so that it later can be used for | ||
2987 | * link/move | ||
2988 | */ | ||
2989 | ret = is_inode_existent(sctx, dir, gen); | ||
2990 | if (ret < 0) | 3010 | if (ret < 0) |
2991 | goto out; | 3011 | goto out; |
2992 | if (!ret) { | ||
2993 | ret = is_first_ref(sctx, sctx->send_root, sctx->cur_ino, dir, | ||
2994 | name->start, fs_path_len(name)); | ||
2995 | if (ret < 0) | ||
2996 | goto out; | ||
2997 | if (ret) | ||
2998 | sctx->cur_inode_first_ref_orphan = 1; | ||
2999 | ret = 0; | ||
3000 | goto out; | ||
3001 | } | ||
3002 | 3012 | ||
3003 | ret = get_cur_path(sctx, dir, gen, p); | 3013 | ret = get_cur_path(sctx, dir, gen, p); |
3004 | if (ret < 0) | 3014 | if (ret < 0) |
@@ -3029,7 +3039,7 @@ static int __record_deleted_ref(int num, u64 dir, int index, | |||
3029 | return -ENOMEM; | 3039 | return -ENOMEM; |
3030 | 3040 | ||
3031 | ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL, | 3041 | ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL, |
3032 | NULL); | 3042 | NULL, NULL); |
3033 | if (ret < 0) | 3043 | if (ret < 0) |
3034 | goto out; | 3044 | goto out; |
3035 | 3045 | ||
@@ -3206,33 +3216,28 @@ static int process_all_refs(struct send_ctx *sctx, | |||
3206 | key.offset = 0; | 3216 | key.offset = 0; |
3207 | while (1) { | 3217 | while (1) { |
3208 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | 3218 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); |
3209 | if (ret < 0) { | 3219 | if (ret < 0) |
3210 | btrfs_release_path(path); | ||
3211 | goto out; | 3220 | goto out; |
3212 | } | 3221 | if (ret) |
3213 | if (ret) { | ||
3214 | btrfs_release_path(path); | ||
3215 | break; | 3222 | break; |
3216 | } | ||
3217 | 3223 | ||
3218 | eb = path->nodes[0]; | 3224 | eb = path->nodes[0]; |
3219 | slot = path->slots[0]; | 3225 | slot = path->slots[0]; |
3220 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 3226 | btrfs_item_key_to_cpu(eb, &found_key, slot); |
3221 | 3227 | ||
3222 | if (found_key.objectid != key.objectid || | 3228 | if (found_key.objectid != key.objectid || |
3223 | found_key.type != key.type) { | 3229 | found_key.type != key.type) |
3224 | btrfs_release_path(path); | ||
3225 | break; | 3230 | break; |
3226 | } | ||
3227 | 3231 | ||
3228 | ret = iterate_inode_ref(sctx, sctx->parent_root, path, | 3232 | ret = iterate_inode_ref(sctx, root, path, &found_key, 0, cb, |
3229 | &found_key, 0, cb, sctx); | 3233 | sctx); |
3230 | btrfs_release_path(path); | 3234 | btrfs_release_path(path); |
3231 | if (ret < 0) | 3235 | if (ret < 0) |
3232 | goto out; | 3236 | goto out; |
3233 | 3237 | ||
3234 | key.offset = found_key.offset + 1; | 3238 | key.offset = found_key.offset + 1; |
3235 | } | 3239 | } |
3240 | btrfs_release_path(path); | ||
3236 | 3241 | ||
3237 | ret = process_recorded_refs(sctx); | 3242 | ret = process_recorded_refs(sctx); |
3238 | 3243 | ||
@@ -3555,7 +3560,7 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len) | |||
3555 | int ret = 0; | 3560 | int ret = 0; |
3556 | struct fs_path *p; | 3561 | struct fs_path *p; |
3557 | loff_t pos = offset; | 3562 | loff_t pos = offset; |
3558 | int readed = 0; | 3563 | int num_read = 0; |
3559 | mm_segment_t old_fs; | 3564 | mm_segment_t old_fs; |
3560 | 3565 | ||
3561 | p = fs_path_alloc(sctx); | 3566 | p = fs_path_alloc(sctx); |
@@ -3580,8 +3585,8 @@ verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); | |||
3580 | ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos); | 3585 | ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos); |
3581 | if (ret < 0) | 3586 | if (ret < 0) |
3582 | goto out; | 3587 | goto out; |
3583 | readed = ret; | 3588 | num_read = ret; |
3584 | if (!readed) | 3589 | if (!num_read) |
3585 | goto out; | 3590 | goto out; |
3586 | 3591 | ||
3587 | ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); | 3592 | ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); |
@@ -3594,7 +3599,7 @@ verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); | |||
3594 | 3599 | ||
3595 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); | 3600 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); |
3596 | TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); | 3601 | TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); |
3597 | TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, readed); | 3602 | TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read); |
3598 | 3603 | ||
3599 | ret = send_cmd(sctx); | 3604 | ret = send_cmd(sctx); |
3600 | 3605 | ||
@@ -3604,7 +3609,7 @@ out: | |||
3604 | set_fs(old_fs); | 3609 | set_fs(old_fs); |
3605 | if (ret < 0) | 3610 | if (ret < 0) |
3606 | return ret; | 3611 | return ret; |
3607 | return readed; | 3612 | return num_read; |
3608 | } | 3613 | } |
3609 | 3614 | ||
3610 | /* | 3615 | /* |
@@ -3615,7 +3620,6 @@ static int send_clone(struct send_ctx *sctx, | |||
3615 | struct clone_root *clone_root) | 3620 | struct clone_root *clone_root) |
3616 | { | 3621 | { |
3617 | int ret = 0; | 3622 | int ret = 0; |
3618 | struct btrfs_root *clone_root2 = clone_root->root; | ||
3619 | struct fs_path *p; | 3623 | struct fs_path *p; |
3620 | u64 gen; | 3624 | u64 gen; |
3621 | 3625 | ||
@@ -3640,22 +3644,23 @@ verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " | |||
3640 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len); | 3644 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len); |
3641 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); | 3645 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); |
3642 | 3646 | ||
3643 | if (clone_root2 == sctx->send_root) { | 3647 | if (clone_root->root == sctx->send_root) { |
3644 | ret = get_inode_info(sctx->send_root, clone_root->ino, NULL, | 3648 | ret = get_inode_info(sctx->send_root, clone_root->ino, NULL, |
3645 | &gen, NULL, NULL, NULL); | 3649 | &gen, NULL, NULL, NULL, NULL); |
3646 | if (ret < 0) | 3650 | if (ret < 0) |
3647 | goto out; | 3651 | goto out; |
3648 | ret = get_cur_path(sctx, clone_root->ino, gen, p); | 3652 | ret = get_cur_path(sctx, clone_root->ino, gen, p); |
3649 | } else { | 3653 | } else { |
3650 | ret = get_inode_path(sctx, clone_root2, clone_root->ino, p); | 3654 | ret = get_inode_path(sctx, clone_root->root, |
3655 | clone_root->ino, p); | ||
3651 | } | 3656 | } |
3652 | if (ret < 0) | 3657 | if (ret < 0) |
3653 | goto out; | 3658 | goto out; |
3654 | 3659 | ||
3655 | TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, | 3660 | TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, |
3656 | clone_root2->root_item.uuid); | 3661 | clone_root->root->root_item.uuid); |
3657 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, | 3662 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, |
3658 | clone_root2->root_item.ctransid); | 3663 | clone_root->root->root_item.ctransid); |
3659 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); | 3664 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); |
3660 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, | 3665 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, |
3661 | clone_root->offset); | 3666 | clone_root->offset); |
@@ -3684,10 +3689,17 @@ static int send_write_or_clone(struct send_ctx *sctx, | |||
3684 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], | 3689 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], |
3685 | struct btrfs_file_extent_item); | 3690 | struct btrfs_file_extent_item); |
3686 | type = btrfs_file_extent_type(path->nodes[0], ei); | 3691 | type = btrfs_file_extent_type(path->nodes[0], ei); |
3687 | if (type == BTRFS_FILE_EXTENT_INLINE) | 3692 | if (type == BTRFS_FILE_EXTENT_INLINE) { |
3688 | len = btrfs_file_extent_inline_len(path->nodes[0], ei); | 3693 | len = btrfs_file_extent_inline_len(path->nodes[0], ei); |
3689 | else | 3694 | /* |
3695 | * it is possible the inline item won't cover the whole page, | ||
3696 | * but there may be items after this page. Make | ||
3697 | * sure to send the whole thing | ||
3698 | */ | ||
3699 | len = PAGE_CACHE_ALIGN(len); | ||
3700 | } else { | ||
3690 | len = btrfs_file_extent_num_bytes(path->nodes[0], ei); | 3701 | len = btrfs_file_extent_num_bytes(path->nodes[0], ei); |
3702 | } | ||
3691 | 3703 | ||
3692 | if (offset + len > sctx->cur_inode_size) | 3704 | if (offset + len > sctx->cur_inode_size) |
3693 | len = sctx->cur_inode_size - offset; | 3705 | len = sctx->cur_inode_size - offset; |
@@ -3735,6 +3747,8 @@ static int is_extent_unchanged(struct send_ctx *sctx, | |||
3735 | u64 left_offset_fixed; | 3747 | u64 left_offset_fixed; |
3736 | u64 left_len; | 3748 | u64 left_len; |
3737 | u64 right_len; | 3749 | u64 right_len; |
3750 | u64 left_gen; | ||
3751 | u64 right_gen; | ||
3738 | u8 left_type; | 3752 | u8 left_type; |
3739 | u8 right_type; | 3753 | u8 right_type; |
3740 | 3754 | ||
@@ -3744,17 +3758,17 @@ static int is_extent_unchanged(struct send_ctx *sctx, | |||
3744 | 3758 | ||
3745 | eb = left_path->nodes[0]; | 3759 | eb = left_path->nodes[0]; |
3746 | slot = left_path->slots[0]; | 3760 | slot = left_path->slots[0]; |
3747 | |||
3748 | ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); | 3761 | ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); |
3749 | left_type = btrfs_file_extent_type(eb, ei); | 3762 | left_type = btrfs_file_extent_type(eb, ei); |
3750 | left_disknr = btrfs_file_extent_disk_bytenr(eb, ei); | ||
3751 | left_len = btrfs_file_extent_num_bytes(eb, ei); | ||
3752 | left_offset = btrfs_file_extent_offset(eb, ei); | ||
3753 | 3763 | ||
3754 | if (left_type != BTRFS_FILE_EXTENT_REG) { | 3764 | if (left_type != BTRFS_FILE_EXTENT_REG) { |
3755 | ret = 0; | 3765 | ret = 0; |
3756 | goto out; | 3766 | goto out; |
3757 | } | 3767 | } |
3768 | left_disknr = btrfs_file_extent_disk_bytenr(eb, ei); | ||
3769 | left_len = btrfs_file_extent_num_bytes(eb, ei); | ||
3770 | left_offset = btrfs_file_extent_offset(eb, ei); | ||
3771 | left_gen = btrfs_file_extent_generation(eb, ei); | ||
3758 | 3772 | ||
3759 | /* | 3773 | /* |
3760 | * Following comments will refer to these graphics. L is the left | 3774 | * Following comments will refer to these graphics. L is the left |
@@ -3810,6 +3824,7 @@ static int is_extent_unchanged(struct send_ctx *sctx, | |||
3810 | right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); | 3824 | right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); |
3811 | right_len = btrfs_file_extent_num_bytes(eb, ei); | 3825 | right_len = btrfs_file_extent_num_bytes(eb, ei); |
3812 | right_offset = btrfs_file_extent_offset(eb, ei); | 3826 | right_offset = btrfs_file_extent_offset(eb, ei); |
3827 | right_gen = btrfs_file_extent_generation(eb, ei); | ||
3813 | 3828 | ||
3814 | if (right_type != BTRFS_FILE_EXTENT_REG) { | 3829 | if (right_type != BTRFS_FILE_EXTENT_REG) { |
3815 | ret = 0; | 3830 | ret = 0; |
@@ -3820,7 +3835,7 @@ static int is_extent_unchanged(struct send_ctx *sctx, | |||
3820 | * Are we at extent 8? If yes, we know the extent is changed. | 3835 | * Are we at extent 8? If yes, we know the extent is changed. |
3821 | * This may only happen on the first iteration. | 3836 | * This may only happen on the first iteration. |
3822 | */ | 3837 | */ |
3823 | if (found_key.offset + right_len < ekey->offset) { | 3838 | if (found_key.offset + right_len <= ekey->offset) { |
3824 | ret = 0; | 3839 | ret = 0; |
3825 | goto out; | 3840 | goto out; |
3826 | } | 3841 | } |
@@ -3837,8 +3852,9 @@ static int is_extent_unchanged(struct send_ctx *sctx, | |||
3837 | /* | 3852 | /* |
3838 | * Check if we have the same extent. | 3853 | * Check if we have the same extent. |
3839 | */ | 3854 | */ |
3840 | if (left_disknr + left_offset_fixed != | 3855 | if (left_disknr != right_disknr || |
3841 | right_disknr + right_offset) { | 3856 | left_offset_fixed != right_offset || |
3857 | left_gen != right_gen) { | ||
3842 | ret = 0; | 3858 | ret = 0; |
3843 | goto out; | 3859 | goto out; |
3844 | } | 3860 | } |
@@ -3977,6 +3993,15 @@ static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end) | |||
3977 | goto out; | 3993 | goto out; |
3978 | 3994 | ||
3979 | ret = process_recorded_refs(sctx); | 3995 | ret = process_recorded_refs(sctx); |
3996 | if (ret < 0) | ||
3997 | goto out; | ||
3998 | |||
3999 | /* | ||
4000 | * We have processed the refs and thus need to advance send_progress. | ||
4001 | * Now, calls to get_cur_xxx will take the updated refs of the current | ||
4002 | * inode into account. | ||
4003 | */ | ||
4004 | sctx->send_progress = sctx->cur_ino + 1; | ||
3980 | 4005 | ||
3981 | out: | 4006 | out: |
3982 | return ret; | 4007 | return ret; |
@@ -4004,7 +4029,7 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) | |||
4004 | goto out; | 4029 | goto out; |
4005 | 4030 | ||
4006 | ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL, | 4031 | ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL, |
4007 | &left_mode, &left_uid, &left_gid); | 4032 | &left_mode, &left_uid, &left_gid, NULL); |
4008 | if (ret < 0) | 4033 | if (ret < 0) |
4009 | goto out; | 4034 | goto out; |
4010 | 4035 | ||
@@ -4015,7 +4040,7 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) | |||
4015 | } else { | 4040 | } else { |
4016 | ret = get_inode_info(sctx->parent_root, sctx->cur_ino, | 4041 | ret = get_inode_info(sctx->parent_root, sctx->cur_ino, |
4017 | NULL, NULL, &right_mode, &right_uid, | 4042 | NULL, NULL, &right_mode, &right_uid, |
4018 | &right_gid); | 4043 | &right_gid, NULL); |
4019 | if (ret < 0) | 4044 | if (ret < 0) |
4020 | goto out; | 4045 | goto out; |
4021 | 4046 | ||
@@ -4074,7 +4099,12 @@ static int changed_inode(struct send_ctx *sctx, | |||
4074 | 4099 | ||
4075 | sctx->cur_ino = key->objectid; | 4100 | sctx->cur_ino = key->objectid; |
4076 | sctx->cur_inode_new_gen = 0; | 4101 | sctx->cur_inode_new_gen = 0; |
4077 | sctx->cur_inode_first_ref_orphan = 0; | 4102 | |
4103 | /* | ||
4104 | * Set send_progress to current inode. This will tell all get_cur_xxx | ||
4105 | * functions that the current inode's refs are not updated yet. Later, | ||
4106 | * when process_recorded_refs is finished, it is set to cur_ino + 1. | ||
4107 | */ | ||
4078 | sctx->send_progress = sctx->cur_ino; | 4108 | sctx->send_progress = sctx->cur_ino; |
4079 | 4109 | ||
4080 | if (result == BTRFS_COMPARE_TREE_NEW || | 4110 | if (result == BTRFS_COMPARE_TREE_NEW || |
@@ -4098,7 +4128,14 @@ static int changed_inode(struct send_ctx *sctx, | |||
4098 | 4128 | ||
4099 | right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], | 4129 | right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], |
4100 | right_ii); | 4130 | right_ii); |
4101 | if (left_gen != right_gen) | 4131 | |
4132 | /* | ||
4133 | * The cur_ino = root dir case is special here. We can't treat | ||
4134 | * the inode as deleted+reused because it would generate a | ||
4135 | * stream that tries to delete/mkdir the root dir. | ||
4136 | */ | ||
4137 | if (left_gen != right_gen && | ||
4138 | sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) | ||
4102 | sctx->cur_inode_new_gen = 1; | 4139 | sctx->cur_inode_new_gen = 1; |
4103 | } | 4140 | } |
4104 | 4141 | ||
@@ -4111,8 +4148,7 @@ static int changed_inode(struct send_ctx *sctx, | |||
4111 | sctx->cur_inode_mode = btrfs_inode_mode( | 4148 | sctx->cur_inode_mode = btrfs_inode_mode( |
4112 | sctx->left_path->nodes[0], left_ii); | 4149 | sctx->left_path->nodes[0], left_ii); |
4113 | if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) | 4150 | if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) |
4114 | ret = send_create_inode(sctx, sctx->left_path, | 4151 | ret = send_create_inode_if_needed(sctx); |
4115 | sctx->cmp_key); | ||
4116 | } else if (result == BTRFS_COMPARE_TREE_DELETED) { | 4152 | } else if (result == BTRFS_COMPARE_TREE_DELETED) { |
4117 | sctx->cur_inode_gen = right_gen; | 4153 | sctx->cur_inode_gen = right_gen; |
4118 | sctx->cur_inode_new = 0; | 4154 | sctx->cur_inode_new = 0; |
@@ -4122,7 +4158,17 @@ static int changed_inode(struct send_ctx *sctx, | |||
4122 | sctx->cur_inode_mode = btrfs_inode_mode( | 4158 | sctx->cur_inode_mode = btrfs_inode_mode( |
4123 | sctx->right_path->nodes[0], right_ii); | 4159 | sctx->right_path->nodes[0], right_ii); |
4124 | } else if (result == BTRFS_COMPARE_TREE_CHANGED) { | 4160 | } else if (result == BTRFS_COMPARE_TREE_CHANGED) { |
4161 | /* | ||
4162 | * We need to do some special handling in case the inode was | ||
4163 | * reported as changed with a changed generation number. This | ||
4164 | * means that the original inode was deleted and new inode | ||
4165 | * reused the same inum. So we have to treat the old inode as | ||
4166 | * deleted and the new one as new. | ||
4167 | */ | ||
4125 | if (sctx->cur_inode_new_gen) { | 4168 | if (sctx->cur_inode_new_gen) { |
4169 | /* | ||
4170 | * First, process the inode as if it was deleted. | ||
4171 | */ | ||
4126 | sctx->cur_inode_gen = right_gen; | 4172 | sctx->cur_inode_gen = right_gen; |
4127 | sctx->cur_inode_new = 0; | 4173 | sctx->cur_inode_new = 0; |
4128 | sctx->cur_inode_deleted = 1; | 4174 | sctx->cur_inode_deleted = 1; |
@@ -4135,6 +4181,9 @@ static int changed_inode(struct send_ctx *sctx, | |||
4135 | if (ret < 0) | 4181 | if (ret < 0) |
4136 | goto out; | 4182 | goto out; |
4137 | 4183 | ||
4184 | /* | ||
4185 | * Now process the inode as if it was new. | ||
4186 | */ | ||
4138 | sctx->cur_inode_gen = left_gen; | 4187 | sctx->cur_inode_gen = left_gen; |
4139 | sctx->cur_inode_new = 1; | 4188 | sctx->cur_inode_new = 1; |
4140 | sctx->cur_inode_deleted = 0; | 4189 | sctx->cur_inode_deleted = 0; |
@@ -4142,14 +4191,23 @@ static int changed_inode(struct send_ctx *sctx, | |||
4142 | sctx->left_path->nodes[0], left_ii); | 4191 | sctx->left_path->nodes[0], left_ii); |
4143 | sctx->cur_inode_mode = btrfs_inode_mode( | 4192 | sctx->cur_inode_mode = btrfs_inode_mode( |
4144 | sctx->left_path->nodes[0], left_ii); | 4193 | sctx->left_path->nodes[0], left_ii); |
4145 | ret = send_create_inode(sctx, sctx->left_path, | 4194 | ret = send_create_inode_if_needed(sctx); |
4146 | sctx->cmp_key); | ||
4147 | if (ret < 0) | 4195 | if (ret < 0) |
4148 | goto out; | 4196 | goto out; |
4149 | 4197 | ||
4150 | ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW); | 4198 | ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW); |
4151 | if (ret < 0) | 4199 | if (ret < 0) |
4152 | goto out; | 4200 | goto out; |
4201 | /* | ||
4202 | * Advance send_progress now as we did not get into | ||
4203 | * process_recorded_refs_if_needed in the new_gen case. | ||
4204 | */ | ||
4205 | sctx->send_progress = sctx->cur_ino + 1; | ||
4206 | |||
4207 | /* | ||
4208 | * Now process all extents and xattrs of the inode as if | ||
4209 | * they were all new. | ||
4210 | */ | ||
4153 | ret = process_all_extents(sctx); | 4211 | ret = process_all_extents(sctx); |
4154 | if (ret < 0) | 4212 | if (ret < 0) |
4155 | goto out; | 4213 | goto out; |
@@ -4172,6 +4230,16 @@ out: | |||
4172 | return ret; | 4230 | return ret; |
4173 | } | 4231 | } |
4174 | 4232 | ||
4233 | /* | ||
4234 | * We have to process new refs before deleted refs, but compare_trees gives us | ||
4235 | * the new and deleted refs mixed. To fix this, we record the new/deleted refs | ||
4236 | * first and later process them in process_recorded_refs. | ||
4237 | * For the cur_inode_new_gen case, we skip recording completely because | ||
4238 | * changed_inode did already initiate processing of refs. The reason for this is | ||
4239 | * that in this case, compare_tree actually compares the refs of 2 different | ||
4240 | * inodes. To fix this, process_all_refs is used in changed_inode to handle all | ||
4241 | * refs of the right tree as deleted and all refs of the left tree as new. | ||
4242 | */ | ||
4175 | static int changed_ref(struct send_ctx *sctx, | 4243 | static int changed_ref(struct send_ctx *sctx, |
4176 | enum btrfs_compare_tree_result result) | 4244 | enum btrfs_compare_tree_result result) |
4177 | { | 4245 | { |
@@ -4192,6 +4260,11 @@ static int changed_ref(struct send_ctx *sctx, | |||
4192 | return ret; | 4260 | return ret; |
4193 | } | 4261 | } |
4194 | 4262 | ||
4263 | /* | ||
4264 | * Process new/deleted/changed xattrs. We skip processing in the | ||
4265 | * cur_inode_new_gen case because changed_inode did already initiate processing | ||
4266 | * of xattrs. The reason is the same as in changed_ref | ||
4267 | */ | ||
4195 | static int changed_xattr(struct send_ctx *sctx, | 4268 | static int changed_xattr(struct send_ctx *sctx, |
4196 | enum btrfs_compare_tree_result result) | 4269 | enum btrfs_compare_tree_result result) |
4197 | { | 4270 | { |
@@ -4211,6 +4284,11 @@ static int changed_xattr(struct send_ctx *sctx, | |||
4211 | return ret; | 4284 | return ret; |
4212 | } | 4285 | } |
4213 | 4286 | ||
4287 | /* | ||
4288 | * Process new/deleted/changed extents. We skip processing in the | ||
4289 | * cur_inode_new_gen case because changed_inode did already initiate processing | ||
4290 | * of extents. The reason is the same as in changed_ref | ||
4291 | */ | ||
4214 | static int changed_extent(struct send_ctx *sctx, | 4292 | static int changed_extent(struct send_ctx *sctx, |
4215 | enum btrfs_compare_tree_result result) | 4293 | enum btrfs_compare_tree_result result) |
4216 | { | 4294 | { |
@@ -4227,7 +4305,10 @@ static int changed_extent(struct send_ctx *sctx, | |||
4227 | return ret; | 4305 | return ret; |
4228 | } | 4306 | } |
4229 | 4307 | ||
4230 | 4308 | /* | |
4309 | * Updates compare related fields in sctx and simply forwards to the actual | ||
4310 | * changed_xxx functions. | ||
4311 | */ | ||
4231 | static int changed_cb(struct btrfs_root *left_root, | 4312 | static int changed_cb(struct btrfs_root *left_root, |
4232 | struct btrfs_root *right_root, | 4313 | struct btrfs_root *right_root, |
4233 | struct btrfs_path *left_path, | 4314 | struct btrfs_path *left_path, |
@@ -4247,6 +4328,11 @@ static int changed_cb(struct btrfs_root *left_root, | |||
4247 | if (ret < 0) | 4328 | if (ret < 0) |
4248 | goto out; | 4329 | goto out; |
4249 | 4330 | ||
4331 | /* Ignore non-FS objects */ | ||
4332 | if (key->objectid == BTRFS_FREE_INO_OBJECTID || | ||
4333 | key->objectid == BTRFS_FREE_SPACE_OBJECTID) | ||
4334 | goto out; | ||
4335 | |||
4250 | if (key->type == BTRFS_INODE_ITEM_KEY) | 4336 | if (key->type == BTRFS_INODE_ITEM_KEY) |
4251 | ret = changed_inode(sctx, result); | 4337 | ret = changed_inode(sctx, result); |
4252 | else if (key->type == BTRFS_INODE_REF_KEY) | 4338 | else if (key->type == BTRFS_INODE_REF_KEY) |
@@ -4299,7 +4385,8 @@ join_trans: | |||
4299 | } | 4385 | } |
4300 | 4386 | ||
4301 | /* | 4387 | /* |
4302 | * Make sure the tree has not changed | 4388 | * Make sure the tree has not changed after re-joining. We detect this |
4389 | * by comparing start_ctransid and ctransid. They should always match. | ||
4303 | */ | 4390 | */ |
4304 | spin_lock(&send_root->root_times_lock); | 4391 | spin_lock(&send_root->root_times_lock); |
4305 | ctransid = btrfs_root_ctransid(&send_root->root_item); | 4392 | ctransid = btrfs_root_ctransid(&send_root->root_item); |
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h index 9934e948e57f..1bf4f32fd4ef 100644 --- a/fs/btrfs/send.h +++ b/fs/btrfs/send.h | |||
@@ -130,4 +130,5 @@ enum { | |||
130 | 130 | ||
131 | #ifdef __KERNEL__ | 131 | #ifdef __KERNEL__ |
132 | long btrfs_ioctl_send(struct file *mnt_file, void __user *arg); | 132 | long btrfs_ioctl_send(struct file *mnt_file, void __user *arg); |
133 | int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off); | ||
133 | #endif | 134 | #endif |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 83d6f9f9c220..915ac14c2064 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -243,12 +243,18 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, | |||
243 | struct btrfs_root *root, const char *function, | 243 | struct btrfs_root *root, const char *function, |
244 | unsigned int line, int errno) | 244 | unsigned int line, int errno) |
245 | { | 245 | { |
246 | WARN_ONCE(1, KERN_DEBUG "btrfs: Transaction aborted"); | 246 | WARN_ONCE(1, KERN_DEBUG "btrfs: Transaction aborted\n"); |
247 | trans->aborted = errno; | 247 | trans->aborted = errno; |
248 | /* Nothing used. The other threads that have joined this | 248 | /* Nothing used. The other threads that have joined this |
249 | * transaction may be able to continue. */ | 249 | * transaction may be able to continue. */ |
250 | if (!trans->blocks_used) { | 250 | if (!trans->blocks_used) { |
251 | btrfs_printk(root->fs_info, "Aborting unused transaction.\n"); | 251 | char nbuf[16]; |
252 | const char *errstr; | ||
253 | |||
254 | errstr = btrfs_decode_error(root->fs_info, errno, nbuf); | ||
255 | btrfs_printk(root->fs_info, | ||
256 | "%s:%d: Aborting unused transaction(%s).\n", | ||
257 | function, line, errstr); | ||
252 | return; | 258 | return; |
253 | } | 259 | } |
254 | trans->transaction->aborted = errno; | 260 | trans->transaction->aborted = errno; |
@@ -407,7 +413,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
407 | btrfs_set_opt(info->mount_opt, NODATASUM); | 413 | btrfs_set_opt(info->mount_opt, NODATASUM); |
408 | break; | 414 | break; |
409 | case Opt_nodatacow: | 415 | case Opt_nodatacow: |
410 | printk(KERN_INFO "btrfs: setting nodatacow\n"); | 416 | if (!btrfs_test_opt(root, COMPRESS) || |
417 | !btrfs_test_opt(root, FORCE_COMPRESS)) { | ||
418 | printk(KERN_INFO "btrfs: setting nodatacow, compression disabled\n"); | ||
419 | } else { | ||
420 | printk(KERN_INFO "btrfs: setting nodatacow\n"); | ||
421 | } | ||
422 | info->compress_type = BTRFS_COMPRESS_NONE; | ||
423 | btrfs_clear_opt(info->mount_opt, COMPRESS); | ||
424 | btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); | ||
411 | btrfs_set_opt(info->mount_opt, NODATACOW); | 425 | btrfs_set_opt(info->mount_opt, NODATACOW); |
412 | btrfs_set_opt(info->mount_opt, NODATASUM); | 426 | btrfs_set_opt(info->mount_opt, NODATASUM); |
413 | break; | 427 | break; |
@@ -422,10 +436,14 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
422 | compress_type = "zlib"; | 436 | compress_type = "zlib"; |
423 | info->compress_type = BTRFS_COMPRESS_ZLIB; | 437 | info->compress_type = BTRFS_COMPRESS_ZLIB; |
424 | btrfs_set_opt(info->mount_opt, COMPRESS); | 438 | btrfs_set_opt(info->mount_opt, COMPRESS); |
439 | btrfs_clear_opt(info->mount_opt, NODATACOW); | ||
440 | btrfs_clear_opt(info->mount_opt, NODATASUM); | ||
425 | } else if (strcmp(args[0].from, "lzo") == 0) { | 441 | } else if (strcmp(args[0].from, "lzo") == 0) { |
426 | compress_type = "lzo"; | 442 | compress_type = "lzo"; |
427 | info->compress_type = BTRFS_COMPRESS_LZO; | 443 | info->compress_type = BTRFS_COMPRESS_LZO; |
428 | btrfs_set_opt(info->mount_opt, COMPRESS); | 444 | btrfs_set_opt(info->mount_opt, COMPRESS); |
445 | btrfs_clear_opt(info->mount_opt, NODATACOW); | ||
446 | btrfs_clear_opt(info->mount_opt, NODATASUM); | ||
429 | btrfs_set_fs_incompat(info, COMPRESS_LZO); | 447 | btrfs_set_fs_incompat(info, COMPRESS_LZO); |
430 | } else if (strncmp(args[0].from, "no", 2) == 0) { | 448 | } else if (strncmp(args[0].from, "no", 2) == 0) { |
431 | compress_type = "no"; | 449 | compress_type = "no"; |
@@ -543,11 +561,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
543 | btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG); | 561 | btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG); |
544 | break; | 562 | break; |
545 | case Opt_defrag: | 563 | case Opt_defrag: |
546 | printk(KERN_INFO "btrfs: enabling auto defrag"); | 564 | printk(KERN_INFO "btrfs: enabling auto defrag\n"); |
547 | btrfs_set_opt(info->mount_opt, AUTO_DEFRAG); | 565 | btrfs_set_opt(info->mount_opt, AUTO_DEFRAG); |
548 | break; | 566 | break; |
549 | case Opt_recovery: | 567 | case Opt_recovery: |
550 | printk(KERN_INFO "btrfs: enabling auto recovery"); | 568 | printk(KERN_INFO "btrfs: enabling auto recovery\n"); |
551 | btrfs_set_opt(info->mount_opt, RECOVERY); | 569 | btrfs_set_opt(info->mount_opt, RECOVERY); |
552 | break; | 570 | break; |
553 | case Opt_skip_balance: | 571 | case Opt_skip_balance: |
@@ -846,18 +864,15 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
846 | return 0; | 864 | return 0; |
847 | } | 865 | } |
848 | 866 | ||
849 | btrfs_wait_ordered_extents(root, 0, 0); | 867 | btrfs_wait_ordered_extents(root, 0); |
850 | |||
851 | spin_lock(&fs_info->trans_lock); | ||
852 | if (!fs_info->running_transaction) { | ||
853 | spin_unlock(&fs_info->trans_lock); | ||
854 | return 0; | ||
855 | } | ||
856 | spin_unlock(&fs_info->trans_lock); | ||
857 | 868 | ||
858 | trans = btrfs_join_transaction(root); | 869 | trans = btrfs_attach_transaction(root); |
859 | if (IS_ERR(trans)) | 870 | if (IS_ERR(trans)) { |
871 | /* no transaction, don't bother */ | ||
872 | if (PTR_ERR(trans) == -ENOENT) | ||
873 | return 0; | ||
860 | return PTR_ERR(trans); | 874 | return PTR_ERR(trans); |
875 | } | ||
861 | return btrfs_commit_transaction(trans, root); | 876 | return btrfs_commit_transaction(trans, root); |
862 | } | 877 | } |
863 | 878 | ||
@@ -1508,17 +1523,21 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, | |||
1508 | 1523 | ||
1509 | static int btrfs_freeze(struct super_block *sb) | 1524 | static int btrfs_freeze(struct super_block *sb) |
1510 | { | 1525 | { |
1511 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); | 1526 | struct btrfs_trans_handle *trans; |
1512 | mutex_lock(&fs_info->transaction_kthread_mutex); | 1527 | struct btrfs_root *root = btrfs_sb(sb)->tree_root; |
1513 | mutex_lock(&fs_info->cleaner_mutex); | 1528 | |
1514 | return 0; | 1529 | trans = btrfs_attach_transaction(root); |
1530 | if (IS_ERR(trans)) { | ||
1531 | /* no transaction, don't bother */ | ||
1532 | if (PTR_ERR(trans) == -ENOENT) | ||
1533 | return 0; | ||
1534 | return PTR_ERR(trans); | ||
1535 | } | ||
1536 | return btrfs_commit_transaction(trans, root); | ||
1515 | } | 1537 | } |
1516 | 1538 | ||
1517 | static int btrfs_unfreeze(struct super_block *sb) | 1539 | static int btrfs_unfreeze(struct super_block *sb) |
1518 | { | 1540 | { |
1519 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); | ||
1520 | mutex_unlock(&fs_info->cleaner_mutex); | ||
1521 | mutex_unlock(&fs_info->transaction_kthread_mutex); | ||
1522 | return 0; | 1541 | return 0; |
1523 | } | 1542 | } |
1524 | 1543 | ||
@@ -1595,7 +1614,7 @@ static int btrfs_interface_init(void) | |||
1595 | static void btrfs_interface_exit(void) | 1614 | static void btrfs_interface_exit(void) |
1596 | { | 1615 | { |
1597 | if (misc_deregister(&btrfs_misc) < 0) | 1616 | if (misc_deregister(&btrfs_misc) < 0) |
1598 | printk(KERN_INFO "misc_deregister failed for control device"); | 1617 | printk(KERN_INFO "btrfs: misc_deregister failed for control device\n"); |
1599 | } | 1618 | } |
1600 | 1619 | ||
1601 | static int __init init_btrfs_fs(void) | 1620 | static int __init init_btrfs_fs(void) |
@@ -1620,10 +1639,14 @@ static int __init init_btrfs_fs(void) | |||
1620 | if (err) | 1639 | if (err) |
1621 | goto free_extent_io; | 1640 | goto free_extent_io; |
1622 | 1641 | ||
1623 | err = btrfs_delayed_inode_init(); | 1642 | err = ordered_data_init(); |
1624 | if (err) | 1643 | if (err) |
1625 | goto free_extent_map; | 1644 | goto free_extent_map; |
1626 | 1645 | ||
1646 | err = btrfs_delayed_inode_init(); | ||
1647 | if (err) | ||
1648 | goto free_ordered_data; | ||
1649 | |||
1627 | err = btrfs_interface_init(); | 1650 | err = btrfs_interface_init(); |
1628 | if (err) | 1651 | if (err) |
1629 | goto free_delayed_inode; | 1652 | goto free_delayed_inode; |
@@ -1641,6 +1664,8 @@ unregister_ioctl: | |||
1641 | btrfs_interface_exit(); | 1664 | btrfs_interface_exit(); |
1642 | free_delayed_inode: | 1665 | free_delayed_inode: |
1643 | btrfs_delayed_inode_exit(); | 1666 | btrfs_delayed_inode_exit(); |
1667 | free_ordered_data: | ||
1668 | ordered_data_exit(); | ||
1644 | free_extent_map: | 1669 | free_extent_map: |
1645 | extent_map_exit(); | 1670 | extent_map_exit(); |
1646 | free_extent_io: | 1671 | free_extent_io: |
@@ -1657,6 +1682,7 @@ static void __exit exit_btrfs_fs(void) | |||
1657 | { | 1682 | { |
1658 | btrfs_destroy_cachep(); | 1683 | btrfs_destroy_cachep(); |
1659 | btrfs_delayed_inode_exit(); | 1684 | btrfs_delayed_inode_exit(); |
1685 | ordered_data_exit(); | ||
1660 | extent_map_exit(); | 1686 | extent_map_exit(); |
1661 | extent_io_exit(); | 1687 | extent_io_exit(); |
1662 | btrfs_interface_exit(); | 1688 | btrfs_interface_exit(); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 27c26004e050..77db875b5116 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -53,7 +53,7 @@ static noinline void switch_commit_root(struct btrfs_root *root) | |||
53 | /* | 53 | /* |
54 | * either allocate a new transaction or hop into the existing one | 54 | * either allocate a new transaction or hop into the existing one |
55 | */ | 55 | */ |
56 | static noinline int join_transaction(struct btrfs_root *root, int nofail) | 56 | static noinline int join_transaction(struct btrfs_root *root, int type) |
57 | { | 57 | { |
58 | struct btrfs_transaction *cur_trans; | 58 | struct btrfs_transaction *cur_trans; |
59 | struct btrfs_fs_info *fs_info = root->fs_info; | 59 | struct btrfs_fs_info *fs_info = root->fs_info; |
@@ -67,7 +67,13 @@ loop: | |||
67 | } | 67 | } |
68 | 68 | ||
69 | if (fs_info->trans_no_join) { | 69 | if (fs_info->trans_no_join) { |
70 | if (!nofail) { | 70 | /* |
71 | * If we are JOIN_NOLOCK we're already committing a current | ||
72 | * transaction, we just need a handle to deal with something | ||
73 | * when committing the transaction, such as inode cache and | ||
74 | * space cache. It is a special case. | ||
75 | */ | ||
76 | if (type != TRANS_JOIN_NOLOCK) { | ||
71 | spin_unlock(&fs_info->trans_lock); | 77 | spin_unlock(&fs_info->trans_lock); |
72 | return -EBUSY; | 78 | return -EBUSY; |
73 | } | 79 | } |
@@ -87,6 +93,13 @@ loop: | |||
87 | } | 93 | } |
88 | spin_unlock(&fs_info->trans_lock); | 94 | spin_unlock(&fs_info->trans_lock); |
89 | 95 | ||
96 | /* | ||
97 | * If we are ATTACH, we just want to catch the current transaction, | ||
98 | * and commit it. If there is no transaction, just return ENOENT. | ||
99 | */ | ||
100 | if (type == TRANS_ATTACH) | ||
101 | return -ENOENT; | ||
102 | |||
90 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); | 103 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); |
91 | if (!cur_trans) | 104 | if (!cur_trans) |
92 | return -ENOMEM; | 105 | return -ENOMEM; |
@@ -267,13 +280,6 @@ static void wait_current_trans(struct btrfs_root *root) | |||
267 | } | 280 | } |
268 | } | 281 | } |
269 | 282 | ||
270 | enum btrfs_trans_type { | ||
271 | TRANS_START, | ||
272 | TRANS_JOIN, | ||
273 | TRANS_USERSPACE, | ||
274 | TRANS_JOIN_NOLOCK, | ||
275 | }; | ||
276 | |||
277 | static int may_wait_transaction(struct btrfs_root *root, int type) | 283 | static int may_wait_transaction(struct btrfs_root *root, int type) |
278 | { | 284 | { |
279 | if (root->fs_info->log_root_recovering) | 285 | if (root->fs_info->log_root_recovering) |
@@ -290,7 +296,8 @@ static int may_wait_transaction(struct btrfs_root *root, int type) | |||
290 | } | 296 | } |
291 | 297 | ||
292 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | 298 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, |
293 | u64 num_items, int type) | 299 | u64 num_items, int type, |
300 | int noflush) | ||
294 | { | 301 | { |
295 | struct btrfs_trans_handle *h; | 302 | struct btrfs_trans_handle *h; |
296 | struct btrfs_transaction *cur_trans; | 303 | struct btrfs_transaction *cur_trans; |
@@ -324,9 +331,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
324 | } | 331 | } |
325 | 332 | ||
326 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); | 333 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); |
327 | ret = btrfs_block_rsv_add(root, | 334 | if (noflush) |
328 | &root->fs_info->trans_block_rsv, | 335 | ret = btrfs_block_rsv_add_noflush(root, |
329 | num_bytes); | 336 | &root->fs_info->trans_block_rsv, |
337 | num_bytes); | ||
338 | else | ||
339 | ret = btrfs_block_rsv_add(root, | ||
340 | &root->fs_info->trans_block_rsv, | ||
341 | num_bytes); | ||
330 | if (ret) | 342 | if (ret) |
331 | return ERR_PTR(ret); | 343 | return ERR_PTR(ret); |
332 | } | 344 | } |
@@ -335,19 +347,34 @@ again: | |||
335 | if (!h) | 347 | if (!h) |
336 | return ERR_PTR(-ENOMEM); | 348 | return ERR_PTR(-ENOMEM); |
337 | 349 | ||
338 | sb_start_intwrite(root->fs_info->sb); | 350 | /* |
351 | * If we are JOIN_NOLOCK we're already committing a transaction and | ||
352 | * waiting on this guy, so we don't need to do the sb_start_intwrite | ||
353 | * because we're already holding a ref. We need this because we could | ||
354 | * have raced in and did an fsync() on a file which can kick a commit | ||
355 | * and then we deadlock with somebody doing a freeze. | ||
356 | * | ||
357 | * If we are ATTACH, it means we just want to catch the current | ||
358 | * transaction and commit it, so we needn't do sb_start_intwrite(). | ||
359 | */ | ||
360 | if (type < TRANS_JOIN_NOLOCK) | ||
361 | sb_start_intwrite(root->fs_info->sb); | ||
339 | 362 | ||
340 | if (may_wait_transaction(root, type)) | 363 | if (may_wait_transaction(root, type)) |
341 | wait_current_trans(root); | 364 | wait_current_trans(root); |
342 | 365 | ||
343 | do { | 366 | do { |
344 | ret = join_transaction(root, type == TRANS_JOIN_NOLOCK); | 367 | ret = join_transaction(root, type); |
345 | if (ret == -EBUSY) | 368 | if (ret == -EBUSY) |
346 | wait_current_trans(root); | 369 | wait_current_trans(root); |
347 | } while (ret == -EBUSY); | 370 | } while (ret == -EBUSY); |
348 | 371 | ||
349 | if (ret < 0) { | 372 | if (ret < 0) { |
350 | sb_end_intwrite(root->fs_info->sb); | 373 | /* We must get the transaction if we are JOIN_NOLOCK. */ |
374 | BUG_ON(type == TRANS_JOIN_NOLOCK); | ||
375 | |||
376 | if (type < TRANS_JOIN_NOLOCK) | ||
377 | sb_end_intwrite(root->fs_info->sb); | ||
351 | kmem_cache_free(btrfs_trans_handle_cachep, h); | 378 | kmem_cache_free(btrfs_trans_handle_cachep, h); |
352 | return ERR_PTR(ret); | 379 | return ERR_PTR(ret); |
353 | } | 380 | } |
@@ -367,7 +394,9 @@ again: | |||
367 | h->aborted = 0; | 394 | h->aborted = 0; |
368 | h->qgroup_reserved = qgroup_reserved; | 395 | h->qgroup_reserved = qgroup_reserved; |
369 | h->delayed_ref_elem.seq = 0; | 396 | h->delayed_ref_elem.seq = 0; |
397 | h->type = type; | ||
370 | INIT_LIST_HEAD(&h->qgroup_ref_list); | 398 | INIT_LIST_HEAD(&h->qgroup_ref_list); |
399 | INIT_LIST_HEAD(&h->new_bgs); | ||
371 | 400 | ||
372 | smp_mb(); | 401 | smp_mb(); |
373 | if (cur_trans->blocked && may_wait_transaction(root, type)) { | 402 | if (cur_trans->blocked && may_wait_transaction(root, type)) { |
@@ -393,21 +422,33 @@ got_it: | |||
393 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 422 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
394 | int num_items) | 423 | int num_items) |
395 | { | 424 | { |
396 | return start_transaction(root, num_items, TRANS_START); | 425 | return start_transaction(root, num_items, TRANS_START, 0); |
426 | } | ||
427 | |||
428 | struct btrfs_trans_handle *btrfs_start_transaction_noflush( | ||
429 | struct btrfs_root *root, int num_items) | ||
430 | { | ||
431 | return start_transaction(root, num_items, TRANS_START, 1); | ||
397 | } | 432 | } |
433 | |||
398 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) | 434 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) |
399 | { | 435 | { |
400 | return start_transaction(root, 0, TRANS_JOIN); | 436 | return start_transaction(root, 0, TRANS_JOIN, 0); |
401 | } | 437 | } |
402 | 438 | ||
403 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root) | 439 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root) |
404 | { | 440 | { |
405 | return start_transaction(root, 0, TRANS_JOIN_NOLOCK); | 441 | return start_transaction(root, 0, TRANS_JOIN_NOLOCK, 0); |
406 | } | 442 | } |
407 | 443 | ||
408 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root) | 444 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root) |
409 | { | 445 | { |
410 | return start_transaction(root, 0, TRANS_USERSPACE); | 446 | return start_transaction(root, 0, TRANS_USERSPACE, 0); |
447 | } | ||
448 | |||
449 | struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root) | ||
450 | { | ||
451 | return start_transaction(root, 0, TRANS_ATTACH, 0); | ||
411 | } | 452 | } |
412 | 453 | ||
413 | /* wait for a transaction commit to be fully complete */ | 454 | /* wait for a transaction commit to be fully complete */ |
@@ -506,11 +547,12 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | |||
506 | } | 547 | } |
507 | 548 | ||
508 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | 549 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, |
509 | struct btrfs_root *root, int throttle, int lock) | 550 | struct btrfs_root *root, int throttle) |
510 | { | 551 | { |
511 | struct btrfs_transaction *cur_trans = trans->transaction; | 552 | struct btrfs_transaction *cur_trans = trans->transaction; |
512 | struct btrfs_fs_info *info = root->fs_info; | 553 | struct btrfs_fs_info *info = root->fs_info; |
513 | int count = 0; | 554 | int count = 0; |
555 | int lock = (trans->type != TRANS_JOIN_NOLOCK); | ||
514 | int err = 0; | 556 | int err = 0; |
515 | 557 | ||
516 | if (--trans->use_count) { | 558 | if (--trans->use_count) { |
@@ -536,6 +578,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
536 | trans->qgroup_reserved = 0; | 578 | trans->qgroup_reserved = 0; |
537 | } | 579 | } |
538 | 580 | ||
581 | if (!list_empty(&trans->new_bgs)) | ||
582 | btrfs_create_pending_block_groups(trans, root); | ||
583 | |||
539 | while (count < 2) { | 584 | while (count < 2) { |
540 | unsigned long cur = trans->delayed_ref_updates; | 585 | unsigned long cur = trans->delayed_ref_updates; |
541 | trans->delayed_ref_updates = 0; | 586 | trans->delayed_ref_updates = 0; |
@@ -551,7 +596,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
551 | btrfs_trans_release_metadata(trans, root); | 596 | btrfs_trans_release_metadata(trans, root); |
552 | trans->block_rsv = NULL; | 597 | trans->block_rsv = NULL; |
553 | 598 | ||
554 | sb_end_intwrite(root->fs_info->sb); | 599 | if (!list_empty(&trans->new_bgs)) |
600 | btrfs_create_pending_block_groups(trans, root); | ||
555 | 601 | ||
556 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && | 602 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && |
557 | should_end_transaction(trans, root)) { | 603 | should_end_transaction(trans, root)) { |
@@ -573,6 +619,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
573 | } | 619 | } |
574 | } | 620 | } |
575 | 621 | ||
622 | if (trans->type < TRANS_JOIN_NOLOCK) | ||
623 | sb_end_intwrite(root->fs_info->sb); | ||
624 | |||
576 | WARN_ON(cur_trans != info->running_transaction); | 625 | WARN_ON(cur_trans != info->running_transaction); |
577 | WARN_ON(atomic_read(&cur_trans->num_writers) < 1); | 626 | WARN_ON(atomic_read(&cur_trans->num_writers) < 1); |
578 | atomic_dec(&cur_trans->num_writers); | 627 | atomic_dec(&cur_trans->num_writers); |
@@ -604,7 +653,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
604 | { | 653 | { |
605 | int ret; | 654 | int ret; |
606 | 655 | ||
607 | ret = __btrfs_end_transaction(trans, root, 0, 1); | 656 | ret = __btrfs_end_transaction(trans, root, 0); |
608 | if (ret) | 657 | if (ret) |
609 | return ret; | 658 | return ret; |
610 | return 0; | 659 | return 0; |
@@ -615,18 +664,7 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, | |||
615 | { | 664 | { |
616 | int ret; | 665 | int ret; |
617 | 666 | ||
618 | ret = __btrfs_end_transaction(trans, root, 1, 1); | 667 | ret = __btrfs_end_transaction(trans, root, 1); |
619 | if (ret) | ||
620 | return ret; | ||
621 | return 0; | ||
622 | } | ||
623 | |||
624 | int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, | ||
625 | struct btrfs_root *root) | ||
626 | { | ||
627 | int ret; | ||
628 | |||
629 | ret = __btrfs_end_transaction(trans, root, 0, 0); | ||
630 | if (ret) | 668 | if (ret) |
631 | return ret; | 669 | return ret; |
632 | return 0; | 670 | return 0; |
@@ -635,7 +673,7 @@ int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, | |||
635 | int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans, | 673 | int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans, |
636 | struct btrfs_root *root) | 674 | struct btrfs_root *root) |
637 | { | 675 | { |
638 | return __btrfs_end_transaction(trans, root, 1, 1); | 676 | return __btrfs_end_transaction(trans, root, 1); |
639 | } | 677 | } |
640 | 678 | ||
641 | /* | 679 | /* |
@@ -649,13 +687,15 @@ int btrfs_write_marked_extents(struct btrfs_root *root, | |||
649 | int err = 0; | 687 | int err = 0; |
650 | int werr = 0; | 688 | int werr = 0; |
651 | struct address_space *mapping = root->fs_info->btree_inode->i_mapping; | 689 | struct address_space *mapping = root->fs_info->btree_inode->i_mapping; |
690 | struct extent_state *cached_state = NULL; | ||
652 | u64 start = 0; | 691 | u64 start = 0; |
653 | u64 end; | 692 | u64 end; |
654 | 693 | ||
655 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, | 694 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, |
656 | mark)) { | 695 | mark, &cached_state)) { |
657 | convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, mark, | 696 | convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, |
658 | GFP_NOFS); | 697 | mark, &cached_state, GFP_NOFS); |
698 | cached_state = NULL; | ||
659 | err = filemap_fdatawrite_range(mapping, start, end); | 699 | err = filemap_fdatawrite_range(mapping, start, end); |
660 | if (err) | 700 | if (err) |
661 | werr = err; | 701 | werr = err; |
@@ -679,12 +719,14 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, | |||
679 | int err = 0; | 719 | int err = 0; |
680 | int werr = 0; | 720 | int werr = 0; |
681 | struct address_space *mapping = root->fs_info->btree_inode->i_mapping; | 721 | struct address_space *mapping = root->fs_info->btree_inode->i_mapping; |
722 | struct extent_state *cached_state = NULL; | ||
682 | u64 start = 0; | 723 | u64 start = 0; |
683 | u64 end; | 724 | u64 end; |
684 | 725 | ||
685 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, | 726 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, |
686 | EXTENT_NEED_WAIT)) { | 727 | EXTENT_NEED_WAIT, &cached_state)) { |
687 | clear_extent_bits(dirty_pages, start, end, EXTENT_NEED_WAIT, GFP_NOFS); | 728 | clear_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, |
729 | 0, 0, &cached_state, GFP_NOFS); | ||
688 | err = filemap_fdatawait_range(mapping, start, end); | 730 | err = filemap_fdatawait_range(mapping, start, end); |
689 | if (err) | 731 | if (err) |
690 | werr = err; | 732 | werr = err; |
@@ -955,6 +997,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
955 | struct btrfs_root *parent_root; | 997 | struct btrfs_root *parent_root; |
956 | struct btrfs_block_rsv *rsv; | 998 | struct btrfs_block_rsv *rsv; |
957 | struct inode *parent_inode; | 999 | struct inode *parent_inode; |
1000 | struct btrfs_path *path; | ||
1001 | struct btrfs_dir_item *dir_item; | ||
958 | struct dentry *parent; | 1002 | struct dentry *parent; |
959 | struct dentry *dentry; | 1003 | struct dentry *dentry; |
960 | struct extent_buffer *tmp; | 1004 | struct extent_buffer *tmp; |
@@ -967,18 +1011,22 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
967 | u64 root_flags; | 1011 | u64 root_flags; |
968 | uuid_le new_uuid; | 1012 | uuid_le new_uuid; |
969 | 1013 | ||
970 | rsv = trans->block_rsv; | 1014 | path = btrfs_alloc_path(); |
1015 | if (!path) { | ||
1016 | ret = pending->error = -ENOMEM; | ||
1017 | goto path_alloc_fail; | ||
1018 | } | ||
971 | 1019 | ||
972 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); | 1020 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); |
973 | if (!new_root_item) { | 1021 | if (!new_root_item) { |
974 | ret = pending->error = -ENOMEM; | 1022 | ret = pending->error = -ENOMEM; |
975 | goto fail; | 1023 | goto root_item_alloc_fail; |
976 | } | 1024 | } |
977 | 1025 | ||
978 | ret = btrfs_find_free_objectid(tree_root, &objectid); | 1026 | ret = btrfs_find_free_objectid(tree_root, &objectid); |
979 | if (ret) { | 1027 | if (ret) { |
980 | pending->error = ret; | 1028 | pending->error = ret; |
981 | goto fail; | 1029 | goto no_free_objectid; |
982 | } | 1030 | } |
983 | 1031 | ||
984 | btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); | 1032 | btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); |
@@ -988,22 +1036,22 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
988 | to_reserve); | 1036 | to_reserve); |
989 | if (ret) { | 1037 | if (ret) { |
990 | pending->error = ret; | 1038 | pending->error = ret; |
991 | goto fail; | 1039 | goto no_free_objectid; |
992 | } | 1040 | } |
993 | } | 1041 | } |
994 | 1042 | ||
995 | ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid, | 1043 | ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid, |
996 | objectid, pending->inherit); | 1044 | objectid, pending->inherit); |
997 | kfree(pending->inherit); | ||
998 | if (ret) { | 1045 | if (ret) { |
999 | pending->error = ret; | 1046 | pending->error = ret; |
1000 | goto fail; | 1047 | goto no_free_objectid; |
1001 | } | 1048 | } |
1002 | 1049 | ||
1003 | key.objectid = objectid; | 1050 | key.objectid = objectid; |
1004 | key.offset = (u64)-1; | 1051 | key.offset = (u64)-1; |
1005 | key.type = BTRFS_ROOT_ITEM_KEY; | 1052 | key.type = BTRFS_ROOT_ITEM_KEY; |
1006 | 1053 | ||
1054 | rsv = trans->block_rsv; | ||
1007 | trans->block_rsv = &pending->block_rsv; | 1055 | trans->block_rsv = &pending->block_rsv; |
1008 | 1056 | ||
1009 | dentry = pending->dentry; | 1057 | dentry = pending->dentry; |
@@ -1017,24 +1065,21 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1017 | */ | 1065 | */ |
1018 | ret = btrfs_set_inode_index(parent_inode, &index); | 1066 | ret = btrfs_set_inode_index(parent_inode, &index); |
1019 | BUG_ON(ret); /* -ENOMEM */ | 1067 | BUG_ON(ret); /* -ENOMEM */ |
1020 | ret = btrfs_insert_dir_item(trans, parent_root, | 1068 | |
1021 | dentry->d_name.name, dentry->d_name.len, | 1069 | /* check if there is a file/dir which has the same name. */ |
1022 | parent_inode, &key, | 1070 | dir_item = btrfs_lookup_dir_item(NULL, parent_root, path, |
1023 | BTRFS_FT_DIR, index); | 1071 | btrfs_ino(parent_inode), |
1024 | if (ret == -EEXIST) { | 1072 | dentry->d_name.name, |
1073 | dentry->d_name.len, 0); | ||
1074 | if (dir_item != NULL && !IS_ERR(dir_item)) { | ||
1025 | pending->error = -EEXIST; | 1075 | pending->error = -EEXIST; |
1026 | dput(parent); | ||
1027 | goto fail; | 1076 | goto fail; |
1028 | } else if (ret) { | 1077 | } else if (IS_ERR(dir_item)) { |
1029 | goto abort_trans_dput; | 1078 | ret = PTR_ERR(dir_item); |
1079 | btrfs_abort_transaction(trans, root, ret); | ||
1080 | goto fail; | ||
1030 | } | 1081 | } |
1031 | 1082 | btrfs_release_path(path); | |
1032 | btrfs_i_size_write(parent_inode, parent_inode->i_size + | ||
1033 | dentry->d_name.len * 2); | ||
1034 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; | ||
1035 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | ||
1036 | if (ret) | ||
1037 | goto abort_trans_dput; | ||
1038 | 1083 | ||
1039 | /* | 1084 | /* |
1040 | * pull in the delayed directory update | 1085 | * pull in the delayed directory update |
@@ -1043,8 +1088,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1043 | * snapshot | 1088 | * snapshot |
1044 | */ | 1089 | */ |
1045 | ret = btrfs_run_delayed_items(trans, root); | 1090 | ret = btrfs_run_delayed_items(trans, root); |
1046 | if (ret) { /* Transaction aborted */ | 1091 | if (ret) { /* Transaction aborted */ |
1047 | dput(parent); | 1092 | btrfs_abort_transaction(trans, root, ret); |
1048 | goto fail; | 1093 | goto fail; |
1049 | } | 1094 | } |
1050 | 1095 | ||
@@ -1079,7 +1124,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1079 | if (ret) { | 1124 | if (ret) { |
1080 | btrfs_tree_unlock(old); | 1125 | btrfs_tree_unlock(old); |
1081 | free_extent_buffer(old); | 1126 | free_extent_buffer(old); |
1082 | goto abort_trans_dput; | 1127 | btrfs_abort_transaction(trans, root, ret); |
1128 | goto fail; | ||
1083 | } | 1129 | } |
1084 | 1130 | ||
1085 | btrfs_set_lock_blocking(old); | 1131 | btrfs_set_lock_blocking(old); |
@@ -1088,8 +1134,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1088 | /* clean up in any case */ | 1134 | /* clean up in any case */ |
1089 | btrfs_tree_unlock(old); | 1135 | btrfs_tree_unlock(old); |
1090 | free_extent_buffer(old); | 1136 | free_extent_buffer(old); |
1091 | if (ret) | 1137 | if (ret) { |
1092 | goto abort_trans_dput; | 1138 | btrfs_abort_transaction(trans, root, ret); |
1139 | goto fail; | ||
1140 | } | ||
1093 | 1141 | ||
1094 | /* see comments in should_cow_block() */ | 1142 | /* see comments in should_cow_block() */ |
1095 | root->force_cow = 1; | 1143 | root->force_cow = 1; |
@@ -1101,8 +1149,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1101 | ret = btrfs_insert_root(trans, tree_root, &key, new_root_item); | 1149 | ret = btrfs_insert_root(trans, tree_root, &key, new_root_item); |
1102 | btrfs_tree_unlock(tmp); | 1150 | btrfs_tree_unlock(tmp); |
1103 | free_extent_buffer(tmp); | 1151 | free_extent_buffer(tmp); |
1104 | if (ret) | 1152 | if (ret) { |
1105 | goto abort_trans_dput; | 1153 | btrfs_abort_transaction(trans, root, ret); |
1154 | goto fail; | ||
1155 | } | ||
1106 | 1156 | ||
1107 | /* | 1157 | /* |
1108 | * insert root back/forward references | 1158 | * insert root back/forward references |
@@ -1111,32 +1161,58 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1111 | parent_root->root_key.objectid, | 1161 | parent_root->root_key.objectid, |
1112 | btrfs_ino(parent_inode), index, | 1162 | btrfs_ino(parent_inode), index, |
1113 | dentry->d_name.name, dentry->d_name.len); | 1163 | dentry->d_name.name, dentry->d_name.len); |
1114 | dput(parent); | 1164 | if (ret) { |
1115 | if (ret) | 1165 | btrfs_abort_transaction(trans, root, ret); |
1116 | goto fail; | 1166 | goto fail; |
1167 | } | ||
1117 | 1168 | ||
1118 | key.offset = (u64)-1; | 1169 | key.offset = (u64)-1; |
1119 | pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); | 1170 | pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); |
1120 | if (IS_ERR(pending->snap)) { | 1171 | if (IS_ERR(pending->snap)) { |
1121 | ret = PTR_ERR(pending->snap); | 1172 | ret = PTR_ERR(pending->snap); |
1122 | goto abort_trans; | 1173 | btrfs_abort_transaction(trans, root, ret); |
1174 | goto fail; | ||
1123 | } | 1175 | } |
1124 | 1176 | ||
1125 | ret = btrfs_reloc_post_snapshot(trans, pending); | 1177 | ret = btrfs_reloc_post_snapshot(trans, pending); |
1178 | if (ret) { | ||
1179 | btrfs_abort_transaction(trans, root, ret); | ||
1180 | goto fail; | ||
1181 | } | ||
1182 | |||
1183 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
1184 | if (ret) { | ||
1185 | btrfs_abort_transaction(trans, root, ret); | ||
1186 | goto fail; | ||
1187 | } | ||
1188 | |||
1189 | ret = btrfs_insert_dir_item(trans, parent_root, | ||
1190 | dentry->d_name.name, dentry->d_name.len, | ||
1191 | parent_inode, &key, | ||
1192 | BTRFS_FT_DIR, index); | ||
1193 | /* We have check then name at the beginning, so it is impossible. */ | ||
1194 | BUG_ON(ret == -EEXIST); | ||
1195 | if (ret) { | ||
1196 | btrfs_abort_transaction(trans, root, ret); | ||
1197 | goto fail; | ||
1198 | } | ||
1199 | |||
1200 | btrfs_i_size_write(parent_inode, parent_inode->i_size + | ||
1201 | dentry->d_name.len * 2); | ||
1202 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; | ||
1203 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | ||
1126 | if (ret) | 1204 | if (ret) |
1127 | goto abort_trans; | 1205 | btrfs_abort_transaction(trans, root, ret); |
1128 | ret = 0; | ||
1129 | fail: | 1206 | fail: |
1130 | kfree(new_root_item); | 1207 | dput(parent); |
1131 | trans->block_rsv = rsv; | 1208 | trans->block_rsv = rsv; |
1209 | no_free_objectid: | ||
1210 | kfree(new_root_item); | ||
1211 | root_item_alloc_fail: | ||
1212 | btrfs_free_path(path); | ||
1213 | path_alloc_fail: | ||
1132 | btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); | 1214 | btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); |
1133 | return ret; | 1215 | return ret; |
1134 | |||
1135 | abort_trans_dput: | ||
1136 | dput(parent); | ||
1137 | abort_trans: | ||
1138 | btrfs_abort_transaction(trans, root, ret); | ||
1139 | goto fail; | ||
1140 | } | 1216 | } |
1141 | 1217 | ||
1142 | /* | 1218 | /* |
@@ -1229,6 +1305,16 @@ static void do_async_commit(struct work_struct *work) | |||
1229 | struct btrfs_async_commit *ac = | 1305 | struct btrfs_async_commit *ac = |
1230 | container_of(work, struct btrfs_async_commit, work.work); | 1306 | container_of(work, struct btrfs_async_commit, work.work); |
1231 | 1307 | ||
1308 | /* | ||
1309 | * We've got freeze protection passed with the transaction. | ||
1310 | * Tell lockdep about it. | ||
1311 | */ | ||
1312 | rwsem_acquire_read( | ||
1313 | &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], | ||
1314 | 0, 1, _THIS_IP_); | ||
1315 | |||
1316 | current->journal_info = ac->newtrans; | ||
1317 | |||
1232 | btrfs_commit_transaction(ac->newtrans, ac->root); | 1318 | btrfs_commit_transaction(ac->newtrans, ac->root); |
1233 | kfree(ac); | 1319 | kfree(ac); |
1234 | } | 1320 | } |
@@ -1258,6 +1344,14 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | |||
1258 | atomic_inc(&cur_trans->use_count); | 1344 | atomic_inc(&cur_trans->use_count); |
1259 | 1345 | ||
1260 | btrfs_end_transaction(trans, root); | 1346 | btrfs_end_transaction(trans, root); |
1347 | |||
1348 | /* | ||
1349 | * Tell lockdep we've released the freeze rwsem, since the | ||
1350 | * async commit thread will be the one to unlock it. | ||
1351 | */ | ||
1352 | rwsem_release(&root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], | ||
1353 | 1, _THIS_IP_); | ||
1354 | |||
1261 | schedule_delayed_work(&ac->work, 0); | 1355 | schedule_delayed_work(&ac->work, 0); |
1262 | 1356 | ||
1263 | /* wait for transaction to start and unblock */ | 1357 | /* wait for transaction to start and unblock */ |
@@ -1348,6 +1442,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1348 | */ | 1442 | */ |
1349 | cur_trans->delayed_refs.flushing = 1; | 1443 | cur_trans->delayed_refs.flushing = 1; |
1350 | 1444 | ||
1445 | if (!list_empty(&trans->new_bgs)) | ||
1446 | btrfs_create_pending_block_groups(trans, root); | ||
1447 | |||
1351 | ret = btrfs_run_delayed_refs(trans, root, 0); | 1448 | ret = btrfs_run_delayed_refs(trans, root, 0); |
1352 | if (ret) | 1449 | if (ret) |
1353 | goto cleanup_transaction; | 1450 | goto cleanup_transaction; |
@@ -1403,7 +1500,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1403 | 1500 | ||
1404 | if (flush_on_commit || snap_pending) { | 1501 | if (flush_on_commit || snap_pending) { |
1405 | btrfs_start_delalloc_inodes(root, 1); | 1502 | btrfs_start_delalloc_inodes(root, 1); |
1406 | btrfs_wait_ordered_extents(root, 0, 1); | 1503 | btrfs_wait_ordered_extents(root, 1); |
1407 | } | 1504 | } |
1408 | 1505 | ||
1409 | ret = btrfs_run_delayed_items(trans, root); | 1506 | ret = btrfs_run_delayed_items(trans, root); |
@@ -1456,13 +1553,28 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1456 | */ | 1553 | */ |
1457 | mutex_lock(&root->fs_info->reloc_mutex); | 1554 | mutex_lock(&root->fs_info->reloc_mutex); |
1458 | 1555 | ||
1459 | ret = btrfs_run_delayed_items(trans, root); | 1556 | /* |
1557 | * We needn't worry about the delayed items because we will | ||
1558 | * deal with them in create_pending_snapshot(), which is the | ||
1559 | * core function of the snapshot creation. | ||
1560 | */ | ||
1561 | ret = create_pending_snapshots(trans, root->fs_info); | ||
1460 | if (ret) { | 1562 | if (ret) { |
1461 | mutex_unlock(&root->fs_info->reloc_mutex); | 1563 | mutex_unlock(&root->fs_info->reloc_mutex); |
1462 | goto cleanup_transaction; | 1564 | goto cleanup_transaction; |
1463 | } | 1565 | } |
1464 | 1566 | ||
1465 | ret = create_pending_snapshots(trans, root->fs_info); | 1567 | /* |
1568 | * We insert the dir indexes of the snapshots and update the inode | ||
1569 | * of the snapshots' parents after the snapshot creation, so there | ||
1570 | * are some delayed items which are not dealt with. Now deal with | ||
1571 | * them. | ||
1572 | * | ||
1573 | * We needn't worry that this operation will corrupt the snapshots, | ||
1574 | * because all the tree which are snapshoted will be forced to COW | ||
1575 | * the nodes and leaves. | ||
1576 | */ | ||
1577 | ret = btrfs_run_delayed_items(trans, root); | ||
1466 | if (ret) { | 1578 | if (ret) { |
1467 | mutex_unlock(&root->fs_info->reloc_mutex); | 1579 | mutex_unlock(&root->fs_info->reloc_mutex); |
1468 | goto cleanup_transaction; | 1580 | goto cleanup_transaction; |
@@ -1584,7 +1696,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1584 | put_transaction(cur_trans); | 1696 | put_transaction(cur_trans); |
1585 | put_transaction(cur_trans); | 1697 | put_transaction(cur_trans); |
1586 | 1698 | ||
1587 | sb_end_intwrite(root->fs_info->sb); | 1699 | if (trans->type < TRANS_JOIN_NOLOCK) |
1700 | sb_end_intwrite(root->fs_info->sb); | ||
1588 | 1701 | ||
1589 | trace_btrfs_transaction_commit(root); | 1702 | trace_btrfs_transaction_commit(root); |
1590 | 1703 | ||
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index e8b8416c688b..80961947a6b2 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -47,6 +47,14 @@ struct btrfs_transaction { | |||
47 | int aborted; | 47 | int aborted; |
48 | }; | 48 | }; |
49 | 49 | ||
50 | enum btrfs_trans_type { | ||
51 | TRANS_START, | ||
52 | TRANS_JOIN, | ||
53 | TRANS_USERSPACE, | ||
54 | TRANS_JOIN_NOLOCK, | ||
55 | TRANS_ATTACH, | ||
56 | }; | ||
57 | |||
50 | struct btrfs_trans_handle { | 58 | struct btrfs_trans_handle { |
51 | u64 transid; | 59 | u64 transid; |
52 | u64 bytes_reserved; | 60 | u64 bytes_reserved; |
@@ -58,8 +66,9 @@ struct btrfs_trans_handle { | |||
58 | struct btrfs_transaction *transaction; | 66 | struct btrfs_transaction *transaction; |
59 | struct btrfs_block_rsv *block_rsv; | 67 | struct btrfs_block_rsv *block_rsv; |
60 | struct btrfs_block_rsv *orig_rsv; | 68 | struct btrfs_block_rsv *orig_rsv; |
61 | int aborted; | 69 | short aborted; |
62 | int adding_csums; | 70 | short adding_csums; |
71 | enum btrfs_trans_type type; | ||
63 | /* | 72 | /* |
64 | * this root is only needed to validate that the root passed to | 73 | * this root is only needed to validate that the root passed to |
65 | * start_transaction is the same as the one passed to end_transaction. | 74 | * start_transaction is the same as the one passed to end_transaction. |
@@ -68,6 +77,7 @@ struct btrfs_trans_handle { | |||
68 | struct btrfs_root *root; | 77 | struct btrfs_root *root; |
69 | struct seq_list delayed_ref_elem; | 78 | struct seq_list delayed_ref_elem; |
70 | struct list_head qgroup_ref_list; | 79 | struct list_head qgroup_ref_list; |
80 | struct list_head new_bgs; | ||
71 | }; | 81 | }; |
72 | 82 | ||
73 | struct btrfs_pending_snapshot { | 83 | struct btrfs_pending_snapshot { |
@@ -88,16 +98,18 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, | |||
88 | { | 98 | { |
89 | BTRFS_I(inode)->last_trans = trans->transaction->transid; | 99 | BTRFS_I(inode)->last_trans = trans->transaction->transid; |
90 | BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; | 100 | BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; |
101 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit; | ||
91 | } | 102 | } |
92 | 103 | ||
93 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, | 104 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, |
94 | struct btrfs_root *root); | 105 | struct btrfs_root *root); |
95 | int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, | ||
96 | struct btrfs_root *root); | ||
97 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 106 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
98 | int num_items); | 107 | int num_items); |
108 | struct btrfs_trans_handle *btrfs_start_transaction_noflush( | ||
109 | struct btrfs_root *root, int num_items); | ||
99 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); | 110 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); |
100 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); | 111 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); |
112 | struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root); | ||
101 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root); | 113 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root); |
102 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); | 114 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); |
103 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 115 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c86670f4f285..81e407d9677a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -18,13 +18,16 @@ | |||
18 | 18 | ||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | #include <linux/list_sort.h> | ||
21 | #include "ctree.h" | 22 | #include "ctree.h" |
22 | #include "transaction.h" | 23 | #include "transaction.h" |
23 | #include "disk-io.h" | 24 | #include "disk-io.h" |
24 | #include "locking.h" | 25 | #include "locking.h" |
25 | #include "print-tree.h" | 26 | #include "print-tree.h" |
27 | #include "backref.h" | ||
26 | #include "compat.h" | 28 | #include "compat.h" |
27 | #include "tree-log.h" | 29 | #include "tree-log.h" |
30 | #include "hash.h" | ||
28 | 31 | ||
29 | /* magic values for the inode_only field in btrfs_log_inode: | 32 | /* magic values for the inode_only field in btrfs_log_inode: |
30 | * | 33 | * |
@@ -146,7 +149,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
146 | root->log_multiple_pids = true; | 149 | root->log_multiple_pids = true; |
147 | } | 150 | } |
148 | 151 | ||
149 | root->log_batch++; | 152 | atomic_inc(&root->log_batch); |
150 | atomic_inc(&root->log_writers); | 153 | atomic_inc(&root->log_writers); |
151 | mutex_unlock(&root->log_mutex); | 154 | mutex_unlock(&root->log_mutex); |
152 | return 0; | 155 | return 0; |
@@ -165,7 +168,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
165 | err = ret; | 168 | err = ret; |
166 | } | 169 | } |
167 | mutex_unlock(&root->fs_info->tree_log_mutex); | 170 | mutex_unlock(&root->fs_info->tree_log_mutex); |
168 | root->log_batch++; | 171 | atomic_inc(&root->log_batch); |
169 | atomic_inc(&root->log_writers); | 172 | atomic_inc(&root->log_writers); |
170 | mutex_unlock(&root->log_mutex); | 173 | mutex_unlock(&root->log_mutex); |
171 | return err; | 174 | return err; |
@@ -484,7 +487,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
484 | int found_type; | 487 | int found_type; |
485 | u64 mask = root->sectorsize - 1; | 488 | u64 mask = root->sectorsize - 1; |
486 | u64 extent_end; | 489 | u64 extent_end; |
487 | u64 alloc_hint; | ||
488 | u64 start = key->offset; | 490 | u64 start = key->offset; |
489 | u64 saved_nbytes; | 491 | u64 saved_nbytes; |
490 | struct btrfs_file_extent_item *item; | 492 | struct btrfs_file_extent_item *item; |
@@ -550,8 +552,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
550 | 552 | ||
551 | saved_nbytes = inode_get_bytes(inode); | 553 | saved_nbytes = inode_get_bytes(inode); |
552 | /* drop any overlapping extents */ | 554 | /* drop any overlapping extents */ |
553 | ret = btrfs_drop_extents(trans, inode, start, extent_end, | 555 | ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1); |
554 | &alloc_hint, 1); | ||
555 | BUG_ON(ret); | 556 | BUG_ON(ret); |
556 | 557 | ||
557 | if (found_type == BTRFS_FILE_EXTENT_REG || | 558 | if (found_type == BTRFS_FILE_EXTENT_REG || |
@@ -744,6 +745,7 @@ out: | |||
744 | */ | 745 | */ |
745 | static noinline int backref_in_log(struct btrfs_root *log, | 746 | static noinline int backref_in_log(struct btrfs_root *log, |
746 | struct btrfs_key *key, | 747 | struct btrfs_key *key, |
748 | u64 ref_objectid, | ||
747 | char *name, int namelen) | 749 | char *name, int namelen) |
748 | { | 750 | { |
749 | struct btrfs_path *path; | 751 | struct btrfs_path *path; |
@@ -764,8 +766,17 @@ static noinline int backref_in_log(struct btrfs_root *log, | |||
764 | if (ret != 0) | 766 | if (ret != 0) |
765 | goto out; | 767 | goto out; |
766 | 768 | ||
767 | item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); | ||
768 | ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); | 769 | ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); |
770 | |||
771 | if (key->type == BTRFS_INODE_EXTREF_KEY) { | ||
772 | if (btrfs_find_name_in_ext_backref(path, ref_objectid, | ||
773 | name, namelen, NULL)) | ||
774 | match = 1; | ||
775 | |||
776 | goto out; | ||
777 | } | ||
778 | |||
779 | item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); | ||
769 | ptr_end = ptr + item_size; | 780 | ptr_end = ptr + item_size; |
770 | while (ptr < ptr_end) { | 781 | while (ptr < ptr_end) { |
771 | ref = (struct btrfs_inode_ref *)ptr; | 782 | ref = (struct btrfs_inode_ref *)ptr; |
@@ -786,91 +797,42 @@ out: | |||
786 | return match; | 797 | return match; |
787 | } | 798 | } |
788 | 799 | ||
789 | 800 | static inline int __add_inode_ref(struct btrfs_trans_handle *trans, | |
790 | /* | ||
791 | * replay one inode back reference item found in the log tree. | ||
792 | * eb, slot and key refer to the buffer and key found in the log tree. | ||
793 | * root is the destination we are replaying into, and path is for temp | ||
794 | * use by this function. (it should be released on return). | ||
795 | */ | ||
796 | static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | ||
797 | struct btrfs_root *root, | 801 | struct btrfs_root *root, |
798 | struct btrfs_root *log, | ||
799 | struct btrfs_path *path, | 802 | struct btrfs_path *path, |
800 | struct extent_buffer *eb, int slot, | 803 | struct btrfs_root *log_root, |
801 | struct btrfs_key *key) | 804 | struct inode *dir, struct inode *inode, |
805 | struct extent_buffer *eb, | ||
806 | u64 inode_objectid, u64 parent_objectid, | ||
807 | u64 ref_index, char *name, int namelen, | ||
808 | int *search_done) | ||
802 | { | 809 | { |
803 | struct btrfs_inode_ref *ref; | ||
804 | struct btrfs_dir_item *di; | ||
805 | struct inode *dir; | ||
806 | struct inode *inode; | ||
807 | unsigned long ref_ptr; | ||
808 | unsigned long ref_end; | ||
809 | char *name; | ||
810 | int namelen; | ||
811 | int ret; | 810 | int ret; |
812 | int search_done = 0; | 811 | char *victim_name; |
813 | 812 | int victim_name_len; | |
814 | /* | 813 | struct extent_buffer *leaf; |
815 | * it is possible that we didn't log all the parent directories | 814 | struct btrfs_dir_item *di; |
816 | * for a given inode. If we don't find the dir, just don't | 815 | struct btrfs_key search_key; |
817 | * copy the back ref in. The link count fixup code will take | 816 | struct btrfs_inode_extref *extref; |
818 | * care of the rest | ||
819 | */ | ||
820 | dir = read_one_inode(root, key->offset); | ||
821 | if (!dir) | ||
822 | return -ENOENT; | ||
823 | |||
824 | inode = read_one_inode(root, key->objectid); | ||
825 | if (!inode) { | ||
826 | iput(dir); | ||
827 | return -EIO; | ||
828 | } | ||
829 | |||
830 | ref_ptr = btrfs_item_ptr_offset(eb, slot); | ||
831 | ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); | ||
832 | 817 | ||
833 | again: | 818 | again: |
834 | ref = (struct btrfs_inode_ref *)ref_ptr; | 819 | /* Search old style refs */ |
835 | 820 | search_key.objectid = inode_objectid; | |
836 | namelen = btrfs_inode_ref_name_len(eb, ref); | 821 | search_key.type = BTRFS_INODE_REF_KEY; |
837 | name = kmalloc(namelen, GFP_NOFS); | 822 | search_key.offset = parent_objectid; |
838 | BUG_ON(!name); | 823 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); |
839 | |||
840 | read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen); | ||
841 | |||
842 | /* if we already have a perfect match, we're done */ | ||
843 | if (inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), | ||
844 | btrfs_inode_ref_index(eb, ref), | ||
845 | name, namelen)) { | ||
846 | goto out; | ||
847 | } | ||
848 | |||
849 | /* | ||
850 | * look for a conflicting back reference in the metadata. | ||
851 | * if we find one we have to unlink that name of the file | ||
852 | * before we add our new link. Later on, we overwrite any | ||
853 | * existing back reference, and we don't want to create | ||
854 | * dangling pointers in the directory. | ||
855 | */ | ||
856 | |||
857 | if (search_done) | ||
858 | goto insert; | ||
859 | |||
860 | ret = btrfs_search_slot(NULL, root, key, path, 0, 0); | ||
861 | if (ret == 0) { | 824 | if (ret == 0) { |
862 | char *victim_name; | ||
863 | int victim_name_len; | ||
864 | struct btrfs_inode_ref *victim_ref; | 825 | struct btrfs_inode_ref *victim_ref; |
865 | unsigned long ptr; | 826 | unsigned long ptr; |
866 | unsigned long ptr_end; | 827 | unsigned long ptr_end; |
867 | struct extent_buffer *leaf = path->nodes[0]; | 828 | |
829 | leaf = path->nodes[0]; | ||
868 | 830 | ||
869 | /* are we trying to overwrite a back ref for the root directory | 831 | /* are we trying to overwrite a back ref for the root directory |
870 | * if so, just jump out, we're done | 832 | * if so, just jump out, we're done |
871 | */ | 833 | */ |
872 | if (key->objectid == key->offset) | 834 | if (search_key.objectid == search_key.offset) |
873 | goto out_nowrite; | 835 | return 1; |
874 | 836 | ||
875 | /* check all the names in this back reference to see | 837 | /* check all the names in this back reference to see |
876 | * if they are in the log. if so, we allow them to stay | 838 | * if they are in the log. if so, we allow them to stay |
@@ -889,7 +851,9 @@ again: | |||
889 | (unsigned long)(victim_ref + 1), | 851 | (unsigned long)(victim_ref + 1), |
890 | victim_name_len); | 852 | victim_name_len); |
891 | 853 | ||
892 | if (!backref_in_log(log, key, victim_name, | 854 | if (!backref_in_log(log_root, &search_key, |
855 | parent_objectid, | ||
856 | victim_name, | ||
893 | victim_name_len)) { | 857 | victim_name_len)) { |
894 | btrfs_inc_nlink(inode); | 858 | btrfs_inc_nlink(inode); |
895 | btrfs_release_path(path); | 859 | btrfs_release_path(path); |
@@ -897,9 +861,14 @@ again: | |||
897 | ret = btrfs_unlink_inode(trans, root, dir, | 861 | ret = btrfs_unlink_inode(trans, root, dir, |
898 | inode, victim_name, | 862 | inode, victim_name, |
899 | victim_name_len); | 863 | victim_name_len); |
864 | BUG_ON(ret); | ||
900 | btrfs_run_delayed_items(trans, root); | 865 | btrfs_run_delayed_items(trans, root); |
866 | kfree(victim_name); | ||
867 | *search_done = 1; | ||
868 | goto again; | ||
901 | } | 869 | } |
902 | kfree(victim_name); | 870 | kfree(victim_name); |
871 | |||
903 | ptr = (unsigned long)(victim_ref + 1) + victim_name_len; | 872 | ptr = (unsigned long)(victim_ref + 1) + victim_name_len; |
904 | } | 873 | } |
905 | BUG_ON(ret); | 874 | BUG_ON(ret); |
@@ -908,14 +877,78 @@ again: | |||
908 | * NOTE: we have searched root tree and checked the | 877 | * NOTE: we have searched root tree and checked the |
909 | * coresponding ref, it does not need to check again. | 878 | * coresponding ref, it does not need to check again. |
910 | */ | 879 | */ |
911 | search_done = 1; | 880 | *search_done = 1; |
881 | } | ||
882 | btrfs_release_path(path); | ||
883 | |||
884 | /* Same search but for extended refs */ | ||
885 | extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen, | ||
886 | inode_objectid, parent_objectid, 0, | ||
887 | 0); | ||
888 | if (!IS_ERR_OR_NULL(extref)) { | ||
889 | u32 item_size; | ||
890 | u32 cur_offset = 0; | ||
891 | unsigned long base; | ||
892 | struct inode *victim_parent; | ||
893 | |||
894 | leaf = path->nodes[0]; | ||
895 | |||
896 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
897 | base = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
898 | |||
899 | while (cur_offset < item_size) { | ||
900 | extref = (struct btrfs_inode_extref *)base + cur_offset; | ||
901 | |||
902 | victim_name_len = btrfs_inode_extref_name_len(leaf, extref); | ||
903 | |||
904 | if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) | ||
905 | goto next; | ||
906 | |||
907 | victim_name = kmalloc(victim_name_len, GFP_NOFS); | ||
908 | read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name, | ||
909 | victim_name_len); | ||
910 | |||
911 | search_key.objectid = inode_objectid; | ||
912 | search_key.type = BTRFS_INODE_EXTREF_KEY; | ||
913 | search_key.offset = btrfs_extref_hash(parent_objectid, | ||
914 | victim_name, | ||
915 | victim_name_len); | ||
916 | ret = 0; | ||
917 | if (!backref_in_log(log_root, &search_key, | ||
918 | parent_objectid, victim_name, | ||
919 | victim_name_len)) { | ||
920 | ret = -ENOENT; | ||
921 | victim_parent = read_one_inode(root, | ||
922 | parent_objectid); | ||
923 | if (victim_parent) { | ||
924 | btrfs_inc_nlink(inode); | ||
925 | btrfs_release_path(path); | ||
926 | |||
927 | ret = btrfs_unlink_inode(trans, root, | ||
928 | victim_parent, | ||
929 | inode, | ||
930 | victim_name, | ||
931 | victim_name_len); | ||
932 | btrfs_run_delayed_items(trans, root); | ||
933 | } | ||
934 | BUG_ON(ret); | ||
935 | iput(victim_parent); | ||
936 | kfree(victim_name); | ||
937 | *search_done = 1; | ||
938 | goto again; | ||
939 | } | ||
940 | kfree(victim_name); | ||
941 | BUG_ON(ret); | ||
942 | next: | ||
943 | cur_offset += victim_name_len + sizeof(*extref); | ||
944 | } | ||
945 | *search_done = 1; | ||
912 | } | 946 | } |
913 | btrfs_release_path(path); | 947 | btrfs_release_path(path); |
914 | 948 | ||
915 | /* look for a conflicting sequence number */ | 949 | /* look for a conflicting sequence number */ |
916 | di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), | 950 | di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), |
917 | btrfs_inode_ref_index(eb, ref), | 951 | ref_index, name, namelen, 0); |
918 | name, namelen, 0); | ||
919 | if (di && !IS_ERR(di)) { | 952 | if (di && !IS_ERR(di)) { |
920 | ret = drop_one_dir_item(trans, root, path, dir, di); | 953 | ret = drop_one_dir_item(trans, root, path, dir, di); |
921 | BUG_ON(ret); | 954 | BUG_ON(ret); |
@@ -931,25 +964,173 @@ again: | |||
931 | } | 964 | } |
932 | btrfs_release_path(path); | 965 | btrfs_release_path(path); |
933 | 966 | ||
934 | insert: | 967 | return 0; |
935 | /* insert our name */ | 968 | } |
936 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, | ||
937 | btrfs_inode_ref_index(eb, ref)); | ||
938 | BUG_ON(ret); | ||
939 | 969 | ||
940 | btrfs_update_inode(trans, root, inode); | 970 | static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, |
971 | u32 *namelen, char **name, u64 *index, | ||
972 | u64 *parent_objectid) | ||
973 | { | ||
974 | struct btrfs_inode_extref *extref; | ||
941 | 975 | ||
942 | out: | 976 | extref = (struct btrfs_inode_extref *)ref_ptr; |
943 | ref_ptr = (unsigned long)(ref + 1) + namelen; | 977 | |
944 | kfree(name); | 978 | *namelen = btrfs_inode_extref_name_len(eb, extref); |
945 | if (ref_ptr < ref_end) | 979 | *name = kmalloc(*namelen, GFP_NOFS); |
946 | goto again; | 980 | if (*name == NULL) |
981 | return -ENOMEM; | ||
982 | |||
983 | read_extent_buffer(eb, *name, (unsigned long)&extref->name, | ||
984 | *namelen); | ||
985 | |||
986 | *index = btrfs_inode_extref_index(eb, extref); | ||
987 | if (parent_objectid) | ||
988 | *parent_objectid = btrfs_inode_extref_parent(eb, extref); | ||
989 | |||
990 | return 0; | ||
991 | } | ||
992 | |||
993 | static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, | ||
994 | u32 *namelen, char **name, u64 *index) | ||
995 | { | ||
996 | struct btrfs_inode_ref *ref; | ||
997 | |||
998 | ref = (struct btrfs_inode_ref *)ref_ptr; | ||
999 | |||
1000 | *namelen = btrfs_inode_ref_name_len(eb, ref); | ||
1001 | *name = kmalloc(*namelen, GFP_NOFS); | ||
1002 | if (*name == NULL) | ||
1003 | return -ENOMEM; | ||
1004 | |||
1005 | read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen); | ||
1006 | |||
1007 | *index = btrfs_inode_ref_index(eb, ref); | ||
1008 | |||
1009 | return 0; | ||
1010 | } | ||
1011 | |||
1012 | /* | ||
1013 | * replay one inode back reference item found in the log tree. | ||
1014 | * eb, slot and key refer to the buffer and key found in the log tree. | ||
1015 | * root is the destination we are replaying into, and path is for temp | ||
1016 | * use by this function. (it should be released on return). | ||
1017 | */ | ||
1018 | static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | ||
1019 | struct btrfs_root *root, | ||
1020 | struct btrfs_root *log, | ||
1021 | struct btrfs_path *path, | ||
1022 | struct extent_buffer *eb, int slot, | ||
1023 | struct btrfs_key *key) | ||
1024 | { | ||
1025 | struct inode *dir; | ||
1026 | struct inode *inode; | ||
1027 | unsigned long ref_ptr; | ||
1028 | unsigned long ref_end; | ||
1029 | char *name; | ||
1030 | int namelen; | ||
1031 | int ret; | ||
1032 | int search_done = 0; | ||
1033 | int log_ref_ver = 0; | ||
1034 | u64 parent_objectid; | ||
1035 | u64 inode_objectid; | ||
1036 | u64 ref_index = 0; | ||
1037 | int ref_struct_size; | ||
1038 | |||
1039 | ref_ptr = btrfs_item_ptr_offset(eb, slot); | ||
1040 | ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); | ||
1041 | |||
1042 | if (key->type == BTRFS_INODE_EXTREF_KEY) { | ||
1043 | struct btrfs_inode_extref *r; | ||
1044 | |||
1045 | ref_struct_size = sizeof(struct btrfs_inode_extref); | ||
1046 | log_ref_ver = 1; | ||
1047 | r = (struct btrfs_inode_extref *)ref_ptr; | ||
1048 | parent_objectid = btrfs_inode_extref_parent(eb, r); | ||
1049 | } else { | ||
1050 | ref_struct_size = sizeof(struct btrfs_inode_ref); | ||
1051 | parent_objectid = key->offset; | ||
1052 | } | ||
1053 | inode_objectid = key->objectid; | ||
1054 | |||
1055 | /* | ||
1056 | * it is possible that we didn't log all the parent directories | ||
1057 | * for a given inode. If we don't find the dir, just don't | ||
1058 | * copy the back ref in. The link count fixup code will take | ||
1059 | * care of the rest | ||
1060 | */ | ||
1061 | dir = read_one_inode(root, parent_objectid); | ||
1062 | if (!dir) | ||
1063 | return -ENOENT; | ||
1064 | |||
1065 | inode = read_one_inode(root, inode_objectid); | ||
1066 | if (!inode) { | ||
1067 | iput(dir); | ||
1068 | return -EIO; | ||
1069 | } | ||
1070 | |||
1071 | while (ref_ptr < ref_end) { | ||
1072 | if (log_ref_ver) { | ||
1073 | ret = extref_get_fields(eb, ref_ptr, &namelen, &name, | ||
1074 | &ref_index, &parent_objectid); | ||
1075 | /* | ||
1076 | * parent object can change from one array | ||
1077 | * item to another. | ||
1078 | */ | ||
1079 | if (!dir) | ||
1080 | dir = read_one_inode(root, parent_objectid); | ||
1081 | if (!dir) | ||
1082 | return -ENOENT; | ||
1083 | } else { | ||
1084 | ret = ref_get_fields(eb, ref_ptr, &namelen, &name, | ||
1085 | &ref_index); | ||
1086 | } | ||
1087 | if (ret) | ||
1088 | return ret; | ||
1089 | |||
1090 | /* if we already have a perfect match, we're done */ | ||
1091 | if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), | ||
1092 | ref_index, name, namelen)) { | ||
1093 | /* | ||
1094 | * look for a conflicting back reference in the | ||
1095 | * metadata. if we find one we have to unlink that name | ||
1096 | * of the file before we add our new link. Later on, we | ||
1097 | * overwrite any existing back reference, and we don't | ||
1098 | * want to create dangling pointers in the directory. | ||
1099 | */ | ||
1100 | |||
1101 | if (!search_done) { | ||
1102 | ret = __add_inode_ref(trans, root, path, log, | ||
1103 | dir, inode, eb, | ||
1104 | inode_objectid, | ||
1105 | parent_objectid, | ||
1106 | ref_index, name, namelen, | ||
1107 | &search_done); | ||
1108 | if (ret == 1) | ||
1109 | goto out; | ||
1110 | BUG_ON(ret); | ||
1111 | } | ||
1112 | |||
1113 | /* insert our name */ | ||
1114 | ret = btrfs_add_link(trans, dir, inode, name, namelen, | ||
1115 | 0, ref_index); | ||
1116 | BUG_ON(ret); | ||
1117 | |||
1118 | btrfs_update_inode(trans, root, inode); | ||
1119 | } | ||
1120 | |||
1121 | ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen; | ||
1122 | kfree(name); | ||
1123 | if (log_ref_ver) { | ||
1124 | iput(dir); | ||
1125 | dir = NULL; | ||
1126 | } | ||
1127 | } | ||
947 | 1128 | ||
948 | /* finally write the back reference in the inode */ | 1129 | /* finally write the back reference in the inode */ |
949 | ret = overwrite_item(trans, root, path, eb, slot, key); | 1130 | ret = overwrite_item(trans, root, path, eb, slot, key); |
950 | BUG_ON(ret); | 1131 | BUG_ON(ret); |
951 | 1132 | ||
952 | out_nowrite: | 1133 | out: |
953 | btrfs_release_path(path); | 1134 | btrfs_release_path(path); |
954 | iput(dir); | 1135 | iput(dir); |
955 | iput(inode); | 1136 | iput(inode); |
@@ -966,25 +1147,55 @@ static int insert_orphan_item(struct btrfs_trans_handle *trans, | |||
966 | return ret; | 1147 | return ret; |
967 | } | 1148 | } |
968 | 1149 | ||
1150 | static int count_inode_extrefs(struct btrfs_root *root, | ||
1151 | struct inode *inode, struct btrfs_path *path) | ||
1152 | { | ||
1153 | int ret = 0; | ||
1154 | int name_len; | ||
1155 | unsigned int nlink = 0; | ||
1156 | u32 item_size; | ||
1157 | u32 cur_offset = 0; | ||
1158 | u64 inode_objectid = btrfs_ino(inode); | ||
1159 | u64 offset = 0; | ||
1160 | unsigned long ptr; | ||
1161 | struct btrfs_inode_extref *extref; | ||
1162 | struct extent_buffer *leaf; | ||
969 | 1163 | ||
970 | /* | 1164 | while (1) { |
971 | * There are a few corners where the link count of the file can't | 1165 | ret = btrfs_find_one_extref(root, inode_objectid, offset, path, |
972 | * be properly maintained during replay. So, instead of adding | 1166 | &extref, &offset); |
973 | * lots of complexity to the log code, we just scan the backrefs | 1167 | if (ret) |
974 | * for any file that has been through replay. | 1168 | break; |
975 | * | 1169 | |
976 | * The scan will update the link count on the inode to reflect the | 1170 | leaf = path->nodes[0]; |
977 | * number of back refs found. If it goes down to zero, the iput | 1171 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); |
978 | * will free the inode. | 1172 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); |
979 | */ | 1173 | |
980 | static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | 1174 | while (cur_offset < item_size) { |
981 | struct btrfs_root *root, | 1175 | extref = (struct btrfs_inode_extref *) (ptr + cur_offset); |
982 | struct inode *inode) | 1176 | name_len = btrfs_inode_extref_name_len(leaf, extref); |
1177 | |||
1178 | nlink++; | ||
1179 | |||
1180 | cur_offset += name_len + sizeof(*extref); | ||
1181 | } | ||
1182 | |||
1183 | offset++; | ||
1184 | btrfs_release_path(path); | ||
1185 | } | ||
1186 | btrfs_release_path(path); | ||
1187 | |||
1188 | if (ret < 0) | ||
1189 | return ret; | ||
1190 | return nlink; | ||
1191 | } | ||
1192 | |||
1193 | static int count_inode_refs(struct btrfs_root *root, | ||
1194 | struct inode *inode, struct btrfs_path *path) | ||
983 | { | 1195 | { |
984 | struct btrfs_path *path; | ||
985 | int ret; | 1196 | int ret; |
986 | struct btrfs_key key; | 1197 | struct btrfs_key key; |
987 | u64 nlink = 0; | 1198 | unsigned int nlink = 0; |
988 | unsigned long ptr; | 1199 | unsigned long ptr; |
989 | unsigned long ptr_end; | 1200 | unsigned long ptr_end; |
990 | int name_len; | 1201 | int name_len; |
@@ -994,10 +1205,6 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
994 | key.type = BTRFS_INODE_REF_KEY; | 1205 | key.type = BTRFS_INODE_REF_KEY; |
995 | key.offset = (u64)-1; | 1206 | key.offset = (u64)-1; |
996 | 1207 | ||
997 | path = btrfs_alloc_path(); | ||
998 | if (!path) | ||
999 | return -ENOMEM; | ||
1000 | |||
1001 | while (1) { | 1208 | while (1) { |
1002 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 1209 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
1003 | if (ret < 0) | 1210 | if (ret < 0) |
@@ -1031,6 +1238,50 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
1031 | btrfs_release_path(path); | 1238 | btrfs_release_path(path); |
1032 | } | 1239 | } |
1033 | btrfs_release_path(path); | 1240 | btrfs_release_path(path); |
1241 | |||
1242 | return nlink; | ||
1243 | } | ||
1244 | |||
1245 | /* | ||
1246 | * There are a few corners where the link count of the file can't | ||
1247 | * be properly maintained during replay. So, instead of adding | ||
1248 | * lots of complexity to the log code, we just scan the backrefs | ||
1249 | * for any file that has been through replay. | ||
1250 | * | ||
1251 | * The scan will update the link count on the inode to reflect the | ||
1252 | * number of back refs found. If it goes down to zero, the iput | ||
1253 | * will free the inode. | ||
1254 | */ | ||
1255 | static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | ||
1256 | struct btrfs_root *root, | ||
1257 | struct inode *inode) | ||
1258 | { | ||
1259 | struct btrfs_path *path; | ||
1260 | int ret; | ||
1261 | u64 nlink = 0; | ||
1262 | u64 ino = btrfs_ino(inode); | ||
1263 | |||
1264 | path = btrfs_alloc_path(); | ||
1265 | if (!path) | ||
1266 | return -ENOMEM; | ||
1267 | |||
1268 | ret = count_inode_refs(root, inode, path); | ||
1269 | if (ret < 0) | ||
1270 | goto out; | ||
1271 | |||
1272 | nlink = ret; | ||
1273 | |||
1274 | ret = count_inode_extrefs(root, inode, path); | ||
1275 | if (ret == -ENOENT) | ||
1276 | ret = 0; | ||
1277 | |||
1278 | if (ret < 0) | ||
1279 | goto out; | ||
1280 | |||
1281 | nlink += ret; | ||
1282 | |||
1283 | ret = 0; | ||
1284 | |||
1034 | if (nlink != inode->i_nlink) { | 1285 | if (nlink != inode->i_nlink) { |
1035 | set_nlink(inode, nlink); | 1286 | set_nlink(inode, nlink); |
1036 | btrfs_update_inode(trans, root, inode); | 1287 | btrfs_update_inode(trans, root, inode); |
@@ -1046,9 +1297,10 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
1046 | ret = insert_orphan_item(trans, root, ino); | 1297 | ret = insert_orphan_item(trans, root, ino); |
1047 | BUG_ON(ret); | 1298 | BUG_ON(ret); |
1048 | } | 1299 | } |
1049 | btrfs_free_path(path); | ||
1050 | 1300 | ||
1051 | return 0; | 1301 | out: |
1302 | btrfs_free_path(path); | ||
1303 | return ret; | ||
1052 | } | 1304 | } |
1053 | 1305 | ||
1054 | static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, | 1306 | static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, |
@@ -1695,6 +1947,10 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
1695 | ret = add_inode_ref(wc->trans, root, log, path, | 1947 | ret = add_inode_ref(wc->trans, root, log, path, |
1696 | eb, i, &key); | 1948 | eb, i, &key); |
1697 | BUG_ON(ret && ret != -ENOENT); | 1949 | BUG_ON(ret && ret != -ENOENT); |
1950 | } else if (key.type == BTRFS_INODE_EXTREF_KEY) { | ||
1951 | ret = add_inode_ref(wc->trans, root, log, path, | ||
1952 | eb, i, &key); | ||
1953 | BUG_ON(ret && ret != -ENOENT); | ||
1698 | } else if (key.type == BTRFS_EXTENT_DATA_KEY) { | 1954 | } else if (key.type == BTRFS_EXTENT_DATA_KEY) { |
1699 | ret = replay_one_extent(wc->trans, root, path, | 1955 | ret = replay_one_extent(wc->trans, root, path, |
1700 | eb, i, &key); | 1956 | eb, i, &key); |
@@ -2037,7 +2293,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2037 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) | 2293 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) |
2038 | wait_log_commit(trans, root, root->log_transid - 1); | 2294 | wait_log_commit(trans, root, root->log_transid - 1); |
2039 | while (1) { | 2295 | while (1) { |
2040 | unsigned long batch = root->log_batch; | 2296 | int batch = atomic_read(&root->log_batch); |
2041 | /* when we're on an ssd, just kick the log commit out */ | 2297 | /* when we're on an ssd, just kick the log commit out */ |
2042 | if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) { | 2298 | if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) { |
2043 | mutex_unlock(&root->log_mutex); | 2299 | mutex_unlock(&root->log_mutex); |
@@ -2045,7 +2301,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2045 | mutex_lock(&root->log_mutex); | 2301 | mutex_lock(&root->log_mutex); |
2046 | } | 2302 | } |
2047 | wait_for_writer(trans, root); | 2303 | wait_for_writer(trans, root); |
2048 | if (batch == root->log_batch) | 2304 | if (batch == atomic_read(&root->log_batch)) |
2049 | break; | 2305 | break; |
2050 | } | 2306 | } |
2051 | 2307 | ||
@@ -2074,7 +2330,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2074 | 2330 | ||
2075 | btrfs_set_root_node(&log->root_item, log->node); | 2331 | btrfs_set_root_node(&log->root_item, log->node); |
2076 | 2332 | ||
2077 | root->log_batch = 0; | ||
2078 | root->log_transid++; | 2333 | root->log_transid++; |
2079 | log->log_transid = root->log_transid; | 2334 | log->log_transid = root->log_transid; |
2080 | root->log_start_pid = 0; | 2335 | root->log_start_pid = 0; |
@@ -2087,7 +2342,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2087 | mutex_unlock(&root->log_mutex); | 2342 | mutex_unlock(&root->log_mutex); |
2088 | 2343 | ||
2089 | mutex_lock(&log_root_tree->log_mutex); | 2344 | mutex_lock(&log_root_tree->log_mutex); |
2090 | log_root_tree->log_batch++; | 2345 | atomic_inc(&log_root_tree->log_batch); |
2091 | atomic_inc(&log_root_tree->log_writers); | 2346 | atomic_inc(&log_root_tree->log_writers); |
2092 | mutex_unlock(&log_root_tree->log_mutex); | 2347 | mutex_unlock(&log_root_tree->log_mutex); |
2093 | 2348 | ||
@@ -2157,7 +2412,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2157 | btrfs_set_super_log_root_level(root->fs_info->super_for_commit, | 2412 | btrfs_set_super_log_root_level(root->fs_info->super_for_commit, |
2158 | btrfs_header_level(log_root_tree->node)); | 2413 | btrfs_header_level(log_root_tree->node)); |
2159 | 2414 | ||
2160 | log_root_tree->log_batch = 0; | ||
2161 | log_root_tree->log_transid++; | 2415 | log_root_tree->log_transid++; |
2162 | smp_mb(); | 2416 | smp_mb(); |
2163 | 2417 | ||
@@ -2171,9 +2425,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2171 | * in and cause problems either. | 2425 | * in and cause problems either. |
2172 | */ | 2426 | */ |
2173 | btrfs_scrub_pause_super(root); | 2427 | btrfs_scrub_pause_super(root); |
2174 | write_ctree_super(trans, root->fs_info->tree_root, 1); | 2428 | ret = write_ctree_super(trans, root->fs_info->tree_root, 1); |
2175 | btrfs_scrub_continue_super(root); | 2429 | btrfs_scrub_continue_super(root); |
2176 | ret = 0; | 2430 | if (ret) { |
2431 | btrfs_abort_transaction(trans, root, ret); | ||
2432 | goto out_wake_log_root; | ||
2433 | } | ||
2177 | 2434 | ||
2178 | mutex_lock(&root->log_mutex); | 2435 | mutex_lock(&root->log_mutex); |
2179 | if (root->last_log_commit < log_transid) | 2436 | if (root->last_log_commit < log_transid) |
@@ -2209,7 +2466,8 @@ static void free_log_tree(struct btrfs_trans_handle *trans, | |||
2209 | 2466 | ||
2210 | while (1) { | 2467 | while (1) { |
2211 | ret = find_first_extent_bit(&log->dirty_log_pages, | 2468 | ret = find_first_extent_bit(&log->dirty_log_pages, |
2212 | 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW); | 2469 | 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW, |
2470 | NULL); | ||
2213 | if (ret) | 2471 | if (ret) |
2214 | break; | 2472 | break; |
2215 | 2473 | ||
@@ -2646,6 +2904,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
2646 | int ret; | 2904 | int ret; |
2647 | struct btrfs_key key; | 2905 | struct btrfs_key key; |
2648 | struct btrfs_key found_key; | 2906 | struct btrfs_key found_key; |
2907 | int start_slot; | ||
2649 | 2908 | ||
2650 | key.objectid = objectid; | 2909 | key.objectid = objectid; |
2651 | key.type = max_key_type; | 2910 | key.type = max_key_type; |
@@ -2667,8 +2926,18 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
2667 | if (found_key.objectid != objectid) | 2926 | if (found_key.objectid != objectid) |
2668 | break; | 2927 | break; |
2669 | 2928 | ||
2670 | ret = btrfs_del_item(trans, log, path); | 2929 | found_key.offset = 0; |
2671 | if (ret) | 2930 | found_key.type = 0; |
2931 | ret = btrfs_bin_search(path->nodes[0], &found_key, 0, | ||
2932 | &start_slot); | ||
2933 | |||
2934 | ret = btrfs_del_items(trans, log, path, start_slot, | ||
2935 | path->slots[0] - start_slot + 1); | ||
2936 | /* | ||
2937 | * If start slot isn't 0 then we don't need to re-search, we've | ||
2938 | * found the last guy with the objectid in this tree. | ||
2939 | */ | ||
2940 | if (ret || start_slot != 0) | ||
2672 | break; | 2941 | break; |
2673 | btrfs_release_path(path); | 2942 | btrfs_release_path(path); |
2674 | } | 2943 | } |
@@ -2678,14 +2947,64 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
2678 | return ret; | 2947 | return ret; |
2679 | } | 2948 | } |
2680 | 2949 | ||
2950 | static void fill_inode_item(struct btrfs_trans_handle *trans, | ||
2951 | struct extent_buffer *leaf, | ||
2952 | struct btrfs_inode_item *item, | ||
2953 | struct inode *inode, int log_inode_only) | ||
2954 | { | ||
2955 | btrfs_set_inode_uid(leaf, item, i_uid_read(inode)); | ||
2956 | btrfs_set_inode_gid(leaf, item, i_gid_read(inode)); | ||
2957 | btrfs_set_inode_mode(leaf, item, inode->i_mode); | ||
2958 | btrfs_set_inode_nlink(leaf, item, inode->i_nlink); | ||
2959 | |||
2960 | btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item), | ||
2961 | inode->i_atime.tv_sec); | ||
2962 | btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item), | ||
2963 | inode->i_atime.tv_nsec); | ||
2964 | |||
2965 | btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item), | ||
2966 | inode->i_mtime.tv_sec); | ||
2967 | btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item), | ||
2968 | inode->i_mtime.tv_nsec); | ||
2969 | |||
2970 | btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item), | ||
2971 | inode->i_ctime.tv_sec); | ||
2972 | btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item), | ||
2973 | inode->i_ctime.tv_nsec); | ||
2974 | |||
2975 | btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); | ||
2976 | |||
2977 | btrfs_set_inode_sequence(leaf, item, inode->i_version); | ||
2978 | btrfs_set_inode_transid(leaf, item, trans->transid); | ||
2979 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); | ||
2980 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); | ||
2981 | btrfs_set_inode_block_group(leaf, item, 0); | ||
2982 | |||
2983 | if (log_inode_only) { | ||
2984 | /* set the generation to zero so the recover code | ||
2985 | * can tell the difference between an logging | ||
2986 | * just to say 'this inode exists' and a logging | ||
2987 | * to say 'update this inode with these values' | ||
2988 | */ | ||
2989 | btrfs_set_inode_generation(leaf, item, 0); | ||
2990 | btrfs_set_inode_size(leaf, item, 0); | ||
2991 | } else { | ||
2992 | btrfs_set_inode_generation(leaf, item, | ||
2993 | BTRFS_I(inode)->generation); | ||
2994 | btrfs_set_inode_size(leaf, item, inode->i_size); | ||
2995 | } | ||
2996 | |||
2997 | } | ||
2998 | |||
2681 | static noinline int copy_items(struct btrfs_trans_handle *trans, | 2999 | static noinline int copy_items(struct btrfs_trans_handle *trans, |
2682 | struct btrfs_root *log, | 3000 | struct inode *inode, |
2683 | struct btrfs_path *dst_path, | 3001 | struct btrfs_path *dst_path, |
2684 | struct extent_buffer *src, | 3002 | struct extent_buffer *src, |
2685 | int start_slot, int nr, int inode_only) | 3003 | int start_slot, int nr, int inode_only) |
2686 | { | 3004 | { |
2687 | unsigned long src_offset; | 3005 | unsigned long src_offset; |
2688 | unsigned long dst_offset; | 3006 | unsigned long dst_offset; |
3007 | struct btrfs_root *log = BTRFS_I(inode)->root->log_root; | ||
2689 | struct btrfs_file_extent_item *extent; | 3008 | struct btrfs_file_extent_item *extent; |
2690 | struct btrfs_inode_item *inode_item; | 3009 | struct btrfs_inode_item *inode_item; |
2691 | int ret; | 3010 | int ret; |
@@ -2694,6 +3013,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2694 | char *ins_data; | 3013 | char *ins_data; |
2695 | int i; | 3014 | int i; |
2696 | struct list_head ordered_sums; | 3015 | struct list_head ordered_sums; |
3016 | int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
2697 | 3017 | ||
2698 | INIT_LIST_HEAD(&ordered_sums); | 3018 | INIT_LIST_HEAD(&ordered_sums); |
2699 | 3019 | ||
@@ -2722,29 +3042,23 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2722 | 3042 | ||
2723 | src_offset = btrfs_item_ptr_offset(src, start_slot + i); | 3043 | src_offset = btrfs_item_ptr_offset(src, start_slot + i); |
2724 | 3044 | ||
2725 | copy_extent_buffer(dst_path->nodes[0], src, dst_offset, | 3045 | if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { |
2726 | src_offset, ins_sizes[i]); | ||
2727 | |||
2728 | if (inode_only == LOG_INODE_EXISTS && | ||
2729 | ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { | ||
2730 | inode_item = btrfs_item_ptr(dst_path->nodes[0], | 3046 | inode_item = btrfs_item_ptr(dst_path->nodes[0], |
2731 | dst_path->slots[0], | 3047 | dst_path->slots[0], |
2732 | struct btrfs_inode_item); | 3048 | struct btrfs_inode_item); |
2733 | btrfs_set_inode_size(dst_path->nodes[0], inode_item, 0); | 3049 | fill_inode_item(trans, dst_path->nodes[0], inode_item, |
2734 | 3050 | inode, inode_only == LOG_INODE_EXISTS); | |
2735 | /* set the generation to zero so the recover code | 3051 | } else { |
2736 | * can tell the difference between an logging | 3052 | copy_extent_buffer(dst_path->nodes[0], src, dst_offset, |
2737 | * just to say 'this inode exists' and a logging | 3053 | src_offset, ins_sizes[i]); |
2738 | * to say 'update this inode with these values' | ||
2739 | */ | ||
2740 | btrfs_set_inode_generation(dst_path->nodes[0], | ||
2741 | inode_item, 0); | ||
2742 | } | 3054 | } |
3055 | |||
2743 | /* take a reference on file data extents so that truncates | 3056 | /* take a reference on file data extents so that truncates |
2744 | * or deletes of this inode don't have to relog the inode | 3057 | * or deletes of this inode don't have to relog the inode |
2745 | * again | 3058 | * again |
2746 | */ | 3059 | */ |
2747 | if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY) { | 3060 | if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY && |
3061 | !skip_csum) { | ||
2748 | int found_type; | 3062 | int found_type; |
2749 | extent = btrfs_item_ptr(src, start_slot + i, | 3063 | extent = btrfs_item_ptr(src, start_slot + i, |
2750 | struct btrfs_file_extent_item); | 3064 | struct btrfs_file_extent_item); |
@@ -2753,8 +3067,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2753 | continue; | 3067 | continue; |
2754 | 3068 | ||
2755 | found_type = btrfs_file_extent_type(src, extent); | 3069 | found_type = btrfs_file_extent_type(src, extent); |
2756 | if (found_type == BTRFS_FILE_EXTENT_REG || | 3070 | if (found_type == BTRFS_FILE_EXTENT_REG) { |
2757 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { | ||
2758 | u64 ds, dl, cs, cl; | 3071 | u64 ds, dl, cs, cl; |
2759 | ds = btrfs_file_extent_disk_bytenr(src, | 3072 | ds = btrfs_file_extent_disk_bytenr(src, |
2760 | extent); | 3073 | extent); |
@@ -2803,6 +3116,239 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2803 | return ret; | 3116 | return ret; |
2804 | } | 3117 | } |
2805 | 3118 | ||
3119 | static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) | ||
3120 | { | ||
3121 | struct extent_map *em1, *em2; | ||
3122 | |||
3123 | em1 = list_entry(a, struct extent_map, list); | ||
3124 | em2 = list_entry(b, struct extent_map, list); | ||
3125 | |||
3126 | if (em1->start < em2->start) | ||
3127 | return -1; | ||
3128 | else if (em1->start > em2->start) | ||
3129 | return 1; | ||
3130 | return 0; | ||
3131 | } | ||
3132 | |||
3133 | struct log_args { | ||
3134 | struct extent_buffer *src; | ||
3135 | u64 next_offset; | ||
3136 | int start_slot; | ||
3137 | int nr; | ||
3138 | }; | ||
3139 | |||
3140 | static int log_one_extent(struct btrfs_trans_handle *trans, | ||
3141 | struct inode *inode, struct btrfs_root *root, | ||
3142 | struct extent_map *em, struct btrfs_path *path, | ||
3143 | struct btrfs_path *dst_path, struct log_args *args) | ||
3144 | { | ||
3145 | struct btrfs_root *log = root->log_root; | ||
3146 | struct btrfs_file_extent_item *fi; | ||
3147 | struct btrfs_key key; | ||
3148 | u64 start = em->mod_start; | ||
3149 | u64 search_start = start; | ||
3150 | u64 len = em->mod_len; | ||
3151 | u64 num_bytes; | ||
3152 | int nritems; | ||
3153 | int ret; | ||
3154 | |||
3155 | if (BTRFS_I(inode)->logged_trans == trans->transid) { | ||
3156 | ret = __btrfs_drop_extents(trans, log, inode, dst_path, start, | ||
3157 | start + len, NULL, 0); | ||
3158 | if (ret) | ||
3159 | return ret; | ||
3160 | } | ||
3161 | |||
3162 | while (len) { | ||
3163 | if (args->nr) | ||
3164 | goto next_slot; | ||
3165 | again: | ||
3166 | key.objectid = btrfs_ino(inode); | ||
3167 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
3168 | key.offset = search_start; | ||
3169 | |||
3170 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
3171 | if (ret < 0) | ||
3172 | return ret; | ||
3173 | |||
3174 | if (ret) { | ||
3175 | /* | ||
3176 | * A rare case were we can have an em for a section of a | ||
3177 | * larger extent so we need to make sure that this em | ||
3178 | * falls within the extent we've found. If not we just | ||
3179 | * bail and go back to ye-olde way of doing things but | ||
3180 | * it happens often enough in testing that we need to do | ||
3181 | * this dance to make sure. | ||
3182 | */ | ||
3183 | do { | ||
3184 | if (path->slots[0] == 0) { | ||
3185 | btrfs_release_path(path); | ||
3186 | if (search_start == 0) | ||
3187 | return -ENOENT; | ||
3188 | search_start--; | ||
3189 | goto again; | ||
3190 | } | ||
3191 | |||
3192 | path->slots[0]--; | ||
3193 | btrfs_item_key_to_cpu(path->nodes[0], &key, | ||
3194 | path->slots[0]); | ||
3195 | if (key.objectid != btrfs_ino(inode) || | ||
3196 | key.type != BTRFS_EXTENT_DATA_KEY) { | ||
3197 | btrfs_release_path(path); | ||
3198 | return -ENOENT; | ||
3199 | } | ||
3200 | } while (key.offset > start); | ||
3201 | |||
3202 | fi = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
3203 | struct btrfs_file_extent_item); | ||
3204 | num_bytes = btrfs_file_extent_num_bytes(path->nodes[0], | ||
3205 | fi); | ||
3206 | if (key.offset + num_bytes <= start) { | ||
3207 | btrfs_release_path(path); | ||
3208 | return -ENOENT; | ||
3209 | } | ||
3210 | } | ||
3211 | args->src = path->nodes[0]; | ||
3212 | next_slot: | ||
3213 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | ||
3214 | fi = btrfs_item_ptr(args->src, path->slots[0], | ||
3215 | struct btrfs_file_extent_item); | ||
3216 | if (args->nr && | ||
3217 | args->start_slot + args->nr == path->slots[0]) { | ||
3218 | args->nr++; | ||
3219 | } else if (args->nr) { | ||
3220 | ret = copy_items(trans, inode, dst_path, args->src, | ||
3221 | args->start_slot, args->nr, | ||
3222 | LOG_INODE_ALL); | ||
3223 | if (ret) | ||
3224 | return ret; | ||
3225 | args->nr = 1; | ||
3226 | args->start_slot = path->slots[0]; | ||
3227 | } else if (!args->nr) { | ||
3228 | args->nr = 1; | ||
3229 | args->start_slot = path->slots[0]; | ||
3230 | } | ||
3231 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
3232 | path->slots[0]++; | ||
3233 | num_bytes = btrfs_file_extent_num_bytes(args->src, fi); | ||
3234 | if (len < num_bytes) { | ||
3235 | /* I _think_ this is ok, envision we write to a | ||
3236 | * preallocated space that is adjacent to a previously | ||
3237 | * written preallocated space that gets merged when we | ||
3238 | * mark this preallocated space written. If we do not | ||
3239 | * have the adjacent extent in cache then when we copy | ||
3240 | * this extent it could end up being larger than our EM | ||
3241 | * thinks it is, which is a-ok, so just set len to 0. | ||
3242 | */ | ||
3243 | len = 0; | ||
3244 | } else { | ||
3245 | len -= num_bytes; | ||
3246 | } | ||
3247 | start = key.offset + num_bytes; | ||
3248 | args->next_offset = start; | ||
3249 | search_start = start; | ||
3250 | |||
3251 | if (path->slots[0] < nritems) { | ||
3252 | if (len) | ||
3253 | goto next_slot; | ||
3254 | break; | ||
3255 | } | ||
3256 | |||
3257 | if (args->nr) { | ||
3258 | ret = copy_items(trans, inode, dst_path, args->src, | ||
3259 | args->start_slot, args->nr, | ||
3260 | LOG_INODE_ALL); | ||
3261 | if (ret) | ||
3262 | return ret; | ||
3263 | args->nr = 0; | ||
3264 | btrfs_release_path(path); | ||
3265 | } | ||
3266 | } | ||
3267 | |||
3268 | return 0; | ||
3269 | } | ||
3270 | |||
3271 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | ||
3272 | struct btrfs_root *root, | ||
3273 | struct inode *inode, | ||
3274 | struct btrfs_path *path, | ||
3275 | struct btrfs_path *dst_path) | ||
3276 | { | ||
3277 | struct log_args args; | ||
3278 | struct extent_map *em, *n; | ||
3279 | struct list_head extents; | ||
3280 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; | ||
3281 | u64 test_gen; | ||
3282 | int ret = 0; | ||
3283 | |||
3284 | INIT_LIST_HEAD(&extents); | ||
3285 | |||
3286 | memset(&args, 0, sizeof(args)); | ||
3287 | |||
3288 | write_lock(&tree->lock); | ||
3289 | test_gen = root->fs_info->last_trans_committed; | ||
3290 | |||
3291 | list_for_each_entry_safe(em, n, &tree->modified_extents, list) { | ||
3292 | list_del_init(&em->list); | ||
3293 | if (em->generation <= test_gen) | ||
3294 | continue; | ||
3295 | /* Need a ref to keep it from getting evicted from cache */ | ||
3296 | atomic_inc(&em->refs); | ||
3297 | set_bit(EXTENT_FLAG_LOGGING, &em->flags); | ||
3298 | list_add_tail(&em->list, &extents); | ||
3299 | } | ||
3300 | |||
3301 | list_sort(NULL, &extents, extent_cmp); | ||
3302 | |||
3303 | while (!list_empty(&extents)) { | ||
3304 | em = list_entry(extents.next, struct extent_map, list); | ||
3305 | |||
3306 | list_del_init(&em->list); | ||
3307 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); | ||
3308 | |||
3309 | /* | ||
3310 | * If we had an error we just need to delete everybody from our | ||
3311 | * private list. | ||
3312 | */ | ||
3313 | if (ret) { | ||
3314 | free_extent_map(em); | ||
3315 | continue; | ||
3316 | } | ||
3317 | |||
3318 | write_unlock(&tree->lock); | ||
3319 | |||
3320 | /* | ||
3321 | * If the previous EM and the last extent we left off on aren't | ||
3322 | * sequential then we need to copy the items we have and redo | ||
3323 | * our search | ||
3324 | */ | ||
3325 | if (args.nr && em->mod_start != args.next_offset) { | ||
3326 | ret = copy_items(trans, inode, dst_path, args.src, | ||
3327 | args.start_slot, args.nr, | ||
3328 | LOG_INODE_ALL); | ||
3329 | if (ret) { | ||
3330 | free_extent_map(em); | ||
3331 | write_lock(&tree->lock); | ||
3332 | continue; | ||
3333 | } | ||
3334 | btrfs_release_path(path); | ||
3335 | args.nr = 0; | ||
3336 | } | ||
3337 | |||
3338 | ret = log_one_extent(trans, inode, root, em, path, dst_path, &args); | ||
3339 | free_extent_map(em); | ||
3340 | write_lock(&tree->lock); | ||
3341 | } | ||
3342 | WARN_ON(!list_empty(&extents)); | ||
3343 | write_unlock(&tree->lock); | ||
3344 | |||
3345 | if (!ret && args.nr) | ||
3346 | ret = copy_items(trans, inode, dst_path, args.src, | ||
3347 | args.start_slot, args.nr, LOG_INODE_ALL); | ||
3348 | btrfs_release_path(path); | ||
3349 | return ret; | ||
3350 | } | ||
3351 | |||
2806 | /* log a single inode in the tree log. | 3352 | /* log a single inode in the tree log. |
2807 | * At least one parent directory for this inode must exist in the tree | 3353 | * At least one parent directory for this inode must exist in the tree |
2808 | * or be logged already. | 3354 | * or be logged already. |
@@ -2832,6 +3378,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2832 | int nritems; | 3378 | int nritems; |
2833 | int ins_start_slot = 0; | 3379 | int ins_start_slot = 0; |
2834 | int ins_nr; | 3380 | int ins_nr; |
3381 | bool fast_search = false; | ||
2835 | u64 ino = btrfs_ino(inode); | 3382 | u64 ino = btrfs_ino(inode); |
2836 | 3383 | ||
2837 | log = root->log_root; | 3384 | log = root->log_root; |
@@ -2851,21 +3398,23 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2851 | 3398 | ||
2852 | max_key.objectid = ino; | 3399 | max_key.objectid = ino; |
2853 | 3400 | ||
2854 | /* today the code can only do partial logging of directories */ | ||
2855 | if (!S_ISDIR(inode->i_mode)) | ||
2856 | inode_only = LOG_INODE_ALL; | ||
2857 | 3401 | ||
3402 | /* today the code can only do partial logging of directories */ | ||
2858 | if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) | 3403 | if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) |
2859 | max_key.type = BTRFS_XATTR_ITEM_KEY; | 3404 | max_key.type = BTRFS_XATTR_ITEM_KEY; |
2860 | else | 3405 | else |
2861 | max_key.type = (u8)-1; | 3406 | max_key.type = (u8)-1; |
2862 | max_key.offset = (u64)-1; | 3407 | max_key.offset = (u64)-1; |
2863 | 3408 | ||
2864 | ret = btrfs_commit_inode_delayed_items(trans, inode); | 3409 | /* Only run delayed items if we are a dir or a new file */ |
2865 | if (ret) { | 3410 | if (S_ISDIR(inode->i_mode) || |
2866 | btrfs_free_path(path); | 3411 | BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) { |
2867 | btrfs_free_path(dst_path); | 3412 | ret = btrfs_commit_inode_delayed_items(trans, inode); |
2868 | return ret; | 3413 | if (ret) { |
3414 | btrfs_free_path(path); | ||
3415 | btrfs_free_path(dst_path); | ||
3416 | return ret; | ||
3417 | } | ||
2869 | } | 3418 | } |
2870 | 3419 | ||
2871 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 3420 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
@@ -2881,7 +3430,16 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2881 | max_key_type = BTRFS_XATTR_ITEM_KEY; | 3430 | max_key_type = BTRFS_XATTR_ITEM_KEY; |
2882 | ret = drop_objectid_items(trans, log, path, ino, max_key_type); | 3431 | ret = drop_objectid_items(trans, log, path, ino, max_key_type); |
2883 | } else { | 3432 | } else { |
2884 | ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); | 3433 | if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
3434 | &BTRFS_I(inode)->runtime_flags)) { | ||
3435 | ret = btrfs_truncate_inode_items(trans, log, | ||
3436 | inode, 0, 0); | ||
3437 | } else { | ||
3438 | fast_search = true; | ||
3439 | max_key.type = BTRFS_XATTR_ITEM_KEY; | ||
3440 | ret = drop_objectid_items(trans, log, path, ino, | ||
3441 | BTRFS_XATTR_ITEM_KEY); | ||
3442 | } | ||
2885 | } | 3443 | } |
2886 | if (ret) { | 3444 | if (ret) { |
2887 | err = ret; | 3445 | err = ret; |
@@ -2912,7 +3470,7 @@ again: | |||
2912 | goto next_slot; | 3470 | goto next_slot; |
2913 | } | 3471 | } |
2914 | 3472 | ||
2915 | ret = copy_items(trans, log, dst_path, src, ins_start_slot, | 3473 | ret = copy_items(trans, inode, dst_path, src, ins_start_slot, |
2916 | ins_nr, inode_only); | 3474 | ins_nr, inode_only); |
2917 | if (ret) { | 3475 | if (ret) { |
2918 | err = ret; | 3476 | err = ret; |
@@ -2930,7 +3488,7 @@ next_slot: | |||
2930 | goto again; | 3488 | goto again; |
2931 | } | 3489 | } |
2932 | if (ins_nr) { | 3490 | if (ins_nr) { |
2933 | ret = copy_items(trans, log, dst_path, src, | 3491 | ret = copy_items(trans, inode, dst_path, src, |
2934 | ins_start_slot, | 3492 | ins_start_slot, |
2935 | ins_nr, inode_only); | 3493 | ins_nr, inode_only); |
2936 | if (ret) { | 3494 | if (ret) { |
@@ -2951,8 +3509,7 @@ next_slot: | |||
2951 | break; | 3509 | break; |
2952 | } | 3510 | } |
2953 | if (ins_nr) { | 3511 | if (ins_nr) { |
2954 | ret = copy_items(trans, log, dst_path, src, | 3512 | ret = copy_items(trans, inode, dst_path, src, ins_start_slot, |
2955 | ins_start_slot, | ||
2956 | ins_nr, inode_only); | 3513 | ins_nr, inode_only); |
2957 | if (ret) { | 3514 | if (ret) { |
2958 | err = ret; | 3515 | err = ret; |
@@ -2960,7 +3517,24 @@ next_slot: | |||
2960 | } | 3517 | } |
2961 | ins_nr = 0; | 3518 | ins_nr = 0; |
2962 | } | 3519 | } |
2963 | WARN_ON(ins_nr); | 3520 | |
3521 | if (fast_search) { | ||
3522 | btrfs_release_path(path); | ||
3523 | btrfs_release_path(dst_path); | ||
3524 | ret = btrfs_log_changed_extents(trans, root, inode, path, | ||
3525 | dst_path); | ||
3526 | if (ret) { | ||
3527 | err = ret; | ||
3528 | goto out_unlock; | ||
3529 | } | ||
3530 | } else { | ||
3531 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; | ||
3532 | struct extent_map *em, *n; | ||
3533 | |||
3534 | list_for_each_entry_safe(em, n, &tree->modified_extents, list) | ||
3535 | list_del_init(&em->list); | ||
3536 | } | ||
3537 | |||
2964 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { | 3538 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { |
2965 | btrfs_release_path(path); | 3539 | btrfs_release_path(path); |
2966 | btrfs_release_path(dst_path); | 3540 | btrfs_release_path(dst_path); |
@@ -2971,6 +3545,7 @@ next_slot: | |||
2971 | } | 3545 | } |
2972 | } | 3546 | } |
2973 | BTRFS_I(inode)->logged_trans = trans->transid; | 3547 | BTRFS_I(inode)->logged_trans = trans->transid; |
3548 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; | ||
2974 | out_unlock: | 3549 | out_unlock: |
2975 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 3550 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
2976 | 3551 | ||
@@ -3138,7 +3713,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
3138 | end_trans: | 3713 | end_trans: |
3139 | dput(old_parent); | 3714 | dput(old_parent); |
3140 | if (ret < 0) { | 3715 | if (ret < 0) { |
3141 | BUG_ON(ret != -ENOSPC); | 3716 | WARN_ON(ret != -ENOSPC); |
3142 | root->fs_info->last_trans_log_full_commit = trans->transid; | 3717 | root->fs_info->last_trans_log_full_commit = trans->transid; |
3143 | ret = 1; | 3718 | ret = 1; |
3144 | } | 3719 | } |
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c index ab942f46b3dd..99be4c138db6 100644 --- a/fs/btrfs/ulist.c +++ b/fs/btrfs/ulist.c | |||
@@ -143,14 +143,13 @@ EXPORT_SYMBOL(ulist_free); | |||
143 | * In case of allocation failure -ENOMEM is returned and the ulist stays | 143 | * In case of allocation failure -ENOMEM is returned and the ulist stays |
144 | * unaltered. | 144 | * unaltered. |
145 | */ | 145 | */ |
146 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, | 146 | int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask) |
147 | gfp_t gfp_mask) | ||
148 | { | 147 | { |
149 | return ulist_add_merge(ulist, val, aux, NULL, gfp_mask); | 148 | return ulist_add_merge(ulist, val, aux, NULL, gfp_mask); |
150 | } | 149 | } |
151 | 150 | ||
152 | int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux, | 151 | int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, |
153 | unsigned long *old_aux, gfp_t gfp_mask) | 152 | u64 *old_aux, gfp_t gfp_mask) |
154 | { | 153 | { |
155 | int i; | 154 | int i; |
156 | 155 | ||
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h index 21bdc8ec8130..21a1963439c3 100644 --- a/fs/btrfs/ulist.h +++ b/fs/btrfs/ulist.h | |||
@@ -33,7 +33,7 @@ struct ulist_iterator { | |||
33 | */ | 33 | */ |
34 | struct ulist_node { | 34 | struct ulist_node { |
35 | u64 val; /* value to store */ | 35 | u64 val; /* value to store */ |
36 | unsigned long aux; /* auxiliary value saved along with the val */ | 36 | u64 aux; /* auxiliary value saved along with the val */ |
37 | }; | 37 | }; |
38 | 38 | ||
39 | struct ulist { | 39 | struct ulist { |
@@ -65,10 +65,9 @@ void ulist_fini(struct ulist *ulist); | |||
65 | void ulist_reinit(struct ulist *ulist); | 65 | void ulist_reinit(struct ulist *ulist); |
66 | struct ulist *ulist_alloc(gfp_t gfp_mask); | 66 | struct ulist *ulist_alloc(gfp_t gfp_mask); |
67 | void ulist_free(struct ulist *ulist); | 67 | void ulist_free(struct ulist *ulist); |
68 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, | 68 | int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask); |
69 | gfp_t gfp_mask); | 69 | int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, |
70 | int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux, | 70 | u64 *old_aux, gfp_t gfp_mask); |
71 | unsigned long *old_aux, gfp_t gfp_mask); | ||
72 | struct ulist_node *ulist_next(struct ulist *ulist, | 71 | struct ulist_node *ulist_next(struct ulist *ulist, |
73 | struct ulist_iterator *uiter); | 72 | struct ulist_iterator *uiter); |
74 | 73 | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 88b969aeeb71..029b903a4ae3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -639,7 +639,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
639 | 639 | ||
640 | bdev = blkdev_get_by_path(device->name->str, flags, holder); | 640 | bdev = blkdev_get_by_path(device->name->str, flags, holder); |
641 | if (IS_ERR(bdev)) { | 641 | if (IS_ERR(bdev)) { |
642 | printk(KERN_INFO "open %s failed\n", device->name->str); | 642 | printk(KERN_INFO "btrfs: open %s failed\n", device->name->str); |
643 | goto error; | 643 | goto error; |
644 | } | 644 | } |
645 | filemap_write_and_wait(bdev->bd_inode->i_mapping); | 645 | filemap_write_and_wait(bdev->bd_inode->i_mapping); |
@@ -1475,6 +1475,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1475 | free_fs_devices(cur_devices); | 1475 | free_fs_devices(cur_devices); |
1476 | } | 1476 | } |
1477 | 1477 | ||
1478 | root->fs_info->num_tolerated_disk_barrier_failures = | ||
1479 | btrfs_calc_num_tolerated_disk_barrier_failures(root->fs_info); | ||
1480 | |||
1478 | /* | 1481 | /* |
1479 | * at this point, the device is zero sized. We want to | 1482 | * at this point, the device is zero sized. We want to |
1480 | * remove it from the devices list and zero out the old super | 1483 | * remove it from the devices list and zero out the old super |
@@ -1775,15 +1778,21 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1775 | 1778 | ||
1776 | if (seeding_dev) { | 1779 | if (seeding_dev) { |
1777 | ret = init_first_rw_device(trans, root, device); | 1780 | ret = init_first_rw_device(trans, root, device); |
1778 | if (ret) | 1781 | if (ret) { |
1782 | btrfs_abort_transaction(trans, root, ret); | ||
1779 | goto error_trans; | 1783 | goto error_trans; |
1784 | } | ||
1780 | ret = btrfs_finish_sprout(trans, root); | 1785 | ret = btrfs_finish_sprout(trans, root); |
1781 | if (ret) | 1786 | if (ret) { |
1787 | btrfs_abort_transaction(trans, root, ret); | ||
1782 | goto error_trans; | 1788 | goto error_trans; |
1789 | } | ||
1783 | } else { | 1790 | } else { |
1784 | ret = btrfs_add_device(trans, root, device); | 1791 | ret = btrfs_add_device(trans, root, device); |
1785 | if (ret) | 1792 | if (ret) { |
1793 | btrfs_abort_transaction(trans, root, ret); | ||
1786 | goto error_trans; | 1794 | goto error_trans; |
1795 | } | ||
1787 | } | 1796 | } |
1788 | 1797 | ||
1789 | /* | 1798 | /* |
@@ -1793,6 +1802,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1793 | btrfs_clear_space_info_full(root->fs_info); | 1802 | btrfs_clear_space_info_full(root->fs_info); |
1794 | 1803 | ||
1795 | unlock_chunks(root); | 1804 | unlock_chunks(root); |
1805 | root->fs_info->num_tolerated_disk_barrier_failures = | ||
1806 | btrfs_calc_num_tolerated_disk_barrier_failures(root->fs_info); | ||
1796 | ret = btrfs_commit_transaction(trans, root); | 1807 | ret = btrfs_commit_transaction(trans, root); |
1797 | 1808 | ||
1798 | if (seeding_dev) { | 1809 | if (seeding_dev) { |
@@ -1814,7 +1825,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1814 | 1825 | ||
1815 | error_trans: | 1826 | error_trans: |
1816 | unlock_chunks(root); | 1827 | unlock_chunks(root); |
1817 | btrfs_abort_transaction(trans, root, ret); | ||
1818 | btrfs_end_transaction(trans, root); | 1828 | btrfs_end_transaction(trans, root); |
1819 | rcu_string_free(device->name); | 1829 | rcu_string_free(device->name); |
1820 | kfree(device); | 1830 | kfree(device); |
@@ -2804,6 +2814,26 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
2804 | } | 2814 | } |
2805 | } | 2815 | } |
2806 | 2816 | ||
2817 | if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { | ||
2818 | int num_tolerated_disk_barrier_failures; | ||
2819 | u64 target = bctl->sys.target; | ||
2820 | |||
2821 | num_tolerated_disk_barrier_failures = | ||
2822 | btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); | ||
2823 | if (num_tolerated_disk_barrier_failures > 0 && | ||
2824 | (target & | ||
2825 | (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID0 | | ||
2826 | BTRFS_AVAIL_ALLOC_BIT_SINGLE))) | ||
2827 | num_tolerated_disk_barrier_failures = 0; | ||
2828 | else if (num_tolerated_disk_barrier_failures > 1 && | ||
2829 | (target & | ||
2830 | (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10))) | ||
2831 | num_tolerated_disk_barrier_failures = 1; | ||
2832 | |||
2833 | fs_info->num_tolerated_disk_barrier_failures = | ||
2834 | num_tolerated_disk_barrier_failures; | ||
2835 | } | ||
2836 | |||
2807 | ret = insert_balance_item(fs_info->tree_root, bctl); | 2837 | ret = insert_balance_item(fs_info->tree_root, bctl); |
2808 | if (ret && ret != -EEXIST) | 2838 | if (ret && ret != -EEXIST) |
2809 | goto out; | 2839 | goto out; |
@@ -2836,6 +2866,11 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
2836 | __cancel_balance(fs_info); | 2866 | __cancel_balance(fs_info); |
2837 | } | 2867 | } |
2838 | 2868 | ||
2869 | if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { | ||
2870 | fs_info->num_tolerated_disk_barrier_failures = | ||
2871 | btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); | ||
2872 | } | ||
2873 | |||
2839 | wake_up(&fs_info->balance_wait_q); | 2874 | wake_up(&fs_info->balance_wait_q); |
2840 | 2875 | ||
2841 | return ret; | 2876 | return ret; |
@@ -3608,12 +3643,16 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, | |||
3608 | ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, | 3643 | ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, |
3609 | &sys_chunk_size, &sys_stripe_size, | 3644 | &sys_chunk_size, &sys_stripe_size, |
3610 | sys_chunk_offset, alloc_profile); | 3645 | sys_chunk_offset, alloc_profile); |
3611 | if (ret) | 3646 | if (ret) { |
3612 | goto abort; | 3647 | btrfs_abort_transaction(trans, root, ret); |
3648 | goto out; | ||
3649 | } | ||
3613 | 3650 | ||
3614 | ret = btrfs_add_device(trans, fs_info->chunk_root, device); | 3651 | ret = btrfs_add_device(trans, fs_info->chunk_root, device); |
3615 | if (ret) | 3652 | if (ret) { |
3616 | goto abort; | 3653 | btrfs_abort_transaction(trans, root, ret); |
3654 | goto out; | ||
3655 | } | ||
3617 | 3656 | ||
3618 | /* | 3657 | /* |
3619 | * Modifying chunk tree needs allocating new blocks from both | 3658 | * Modifying chunk tree needs allocating new blocks from both |
@@ -3623,19 +3662,19 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, | |||
3623 | */ | 3662 | */ |
3624 | ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset, | 3663 | ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset, |
3625 | chunk_size, stripe_size); | 3664 | chunk_size, stripe_size); |
3626 | if (ret) | 3665 | if (ret) { |
3627 | goto abort; | 3666 | btrfs_abort_transaction(trans, root, ret); |
3667 | goto out; | ||
3668 | } | ||
3628 | 3669 | ||
3629 | ret = __finish_chunk_alloc(trans, extent_root, sys_map, | 3670 | ret = __finish_chunk_alloc(trans, extent_root, sys_map, |
3630 | sys_chunk_offset, sys_chunk_size, | 3671 | sys_chunk_offset, sys_chunk_size, |
3631 | sys_stripe_size); | 3672 | sys_stripe_size); |
3632 | if (ret) | 3673 | if (ret) |
3633 | goto abort; | 3674 | btrfs_abort_transaction(trans, root, ret); |
3634 | 3675 | ||
3635 | return 0; | 3676 | out: |
3636 | 3677 | ||
3637 | abort: | ||
3638 | btrfs_abort_transaction(trans, root, ret); | ||
3639 | return ret; | 3678 | return ret; |
3640 | } | 3679 | } |
3641 | 3680 | ||
@@ -3760,7 +3799,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
3760 | read_unlock(&em_tree->lock); | 3799 | read_unlock(&em_tree->lock); |
3761 | 3800 | ||
3762 | if (!em) { | 3801 | if (!em) { |
3763 | printk(KERN_CRIT "unable to find logical %llu len %llu\n", | 3802 | printk(KERN_CRIT "btrfs: unable to find logical %llu len %llu\n", |
3764 | (unsigned long long)logical, | 3803 | (unsigned long long)logical, |
3765 | (unsigned long long)*length); | 3804 | (unsigned long long)*length); |
3766 | BUG(); | 3805 | BUG(); |
@@ -4217,7 +4256,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
4217 | 4256 | ||
4218 | total_devs = bbio->num_stripes; | 4257 | total_devs = bbio->num_stripes; |
4219 | if (map_length < length) { | 4258 | if (map_length < length) { |
4220 | printk(KERN_CRIT "mapping failed logical %llu bio len %llu " | 4259 | printk(KERN_CRIT "btrfs: mapping failed logical %llu bio len %llu " |
4221 | "len %llu\n", (unsigned long long)logical, | 4260 | "len %llu\n", (unsigned long long)logical, |
4222 | (unsigned long long)length, | 4261 | (unsigned long long)length, |
4223 | (unsigned long long)map_length); | 4262 | (unsigned long long)map_length); |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 92c20654cc55..9acb846c3e7f 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
@@ -97,7 +97,7 @@ static int zlib_compress_pages(struct list_head *ws, | |||
97 | *total_in = 0; | 97 | *total_in = 0; |
98 | 98 | ||
99 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { | 99 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { |
100 | printk(KERN_WARNING "deflateInit failed\n"); | 100 | printk(KERN_WARNING "btrfs: deflateInit failed\n"); |
101 | ret = -1; | 101 | ret = -1; |
102 | goto out; | 102 | goto out; |
103 | } | 103 | } |
@@ -125,7 +125,7 @@ static int zlib_compress_pages(struct list_head *ws, | |||
125 | while (workspace->def_strm.total_in < len) { | 125 | while (workspace->def_strm.total_in < len) { |
126 | ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); | 126 | ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); |
127 | if (ret != Z_OK) { | 127 | if (ret != Z_OK) { |
128 | printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", | 128 | printk(KERN_DEBUG "btrfs: deflate in loop returned %d\n", |
129 | ret); | 129 | ret); |
130 | zlib_deflateEnd(&workspace->def_strm); | 130 | zlib_deflateEnd(&workspace->def_strm); |
131 | ret = -1; | 131 | ret = -1; |
@@ -252,7 +252,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, | |||
252 | } | 252 | } |
253 | 253 | ||
254 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 254 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
255 | printk(KERN_WARNING "inflateInit failed\n"); | 255 | printk(KERN_WARNING "btrfs: inflateInit failed\n"); |
256 | return -1; | 256 | return -1; |
257 | } | 257 | } |
258 | while (workspace->inf_strm.total_in < srclen) { | 258 | while (workspace->inf_strm.total_in < srclen) { |
@@ -336,7 +336,7 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, | |||
336 | } | 336 | } |
337 | 337 | ||
338 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 338 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
339 | printk(KERN_WARNING "inflateInit failed\n"); | 339 | printk(KERN_WARNING "btrfs: inflateInit failed\n"); |
340 | return -1; | 340 | return -1; |
341 | } | 341 | } |
342 | 342 | ||
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 8e1b60e557b6..02ce90972d81 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
@@ -99,7 +99,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, | |||
99 | * FIXME: we should try harder by querying the mds for the ino. | 99 | * FIXME: we should try harder by querying the mds for the ino. |
100 | */ | 100 | */ |
101 | static struct dentry *__fh_to_dentry(struct super_block *sb, | 101 | static struct dentry *__fh_to_dentry(struct super_block *sb, |
102 | struct ceph_nfs_fh *fh) | 102 | struct ceph_nfs_fh *fh, int fh_len) |
103 | { | 103 | { |
104 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; | 104 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; |
105 | struct inode *inode; | 105 | struct inode *inode; |
@@ -107,6 +107,9 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
107 | struct ceph_vino vino; | 107 | struct ceph_vino vino; |
108 | int err; | 108 | int err; |
109 | 109 | ||
110 | if (fh_len < sizeof(*fh) / 4) | ||
111 | return ERR_PTR(-ESTALE); | ||
112 | |||
110 | dout("__fh_to_dentry %llx\n", fh->ino); | 113 | dout("__fh_to_dentry %llx\n", fh->ino); |
111 | vino.ino = fh->ino; | 114 | vino.ino = fh->ino; |
112 | vino.snap = CEPH_NOSNAP; | 115 | vino.snap = CEPH_NOSNAP; |
@@ -150,7 +153,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
150 | * convert connectable fh to dentry | 153 | * convert connectable fh to dentry |
151 | */ | 154 | */ |
152 | static struct dentry *__cfh_to_dentry(struct super_block *sb, | 155 | static struct dentry *__cfh_to_dentry(struct super_block *sb, |
153 | struct ceph_nfs_confh *cfh) | 156 | struct ceph_nfs_confh *cfh, int fh_len) |
154 | { | 157 | { |
155 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; | 158 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; |
156 | struct inode *inode; | 159 | struct inode *inode; |
@@ -158,6 +161,9 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb, | |||
158 | struct ceph_vino vino; | 161 | struct ceph_vino vino; |
159 | int err; | 162 | int err; |
160 | 163 | ||
164 | if (fh_len < sizeof(*cfh) / 4) | ||
165 | return ERR_PTR(-ESTALE); | ||
166 | |||
161 | dout("__cfh_to_dentry %llx (%llx/%x)\n", | 167 | dout("__cfh_to_dentry %llx (%llx/%x)\n", |
162 | cfh->ino, cfh->parent_ino, cfh->parent_name_hash); | 168 | cfh->ino, cfh->parent_ino, cfh->parent_name_hash); |
163 | 169 | ||
@@ -207,9 +213,11 @@ static struct dentry *ceph_fh_to_dentry(struct super_block *sb, struct fid *fid, | |||
207 | int fh_len, int fh_type) | 213 | int fh_len, int fh_type) |
208 | { | 214 | { |
209 | if (fh_type == 1) | 215 | if (fh_type == 1) |
210 | return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw); | 216 | return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw, |
217 | fh_len); | ||
211 | else | 218 | else |
212 | return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw); | 219 | return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw, |
220 | fh_len); | ||
213 | } | 221 | } |
214 | 222 | ||
215 | /* | 223 | /* |
@@ -230,6 +238,8 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, | |||
230 | 238 | ||
231 | if (fh_type == 1) | 239 | if (fh_type == 1) |
232 | return ERR_PTR(-ESTALE); | 240 | return ERR_PTR(-ESTALE); |
241 | if (fh_len < sizeof(*cfh) / 4) | ||
242 | return ERR_PTR(-ESTALE); | ||
233 | 243 | ||
234 | pr_debug("fh_to_parent %llx/%d\n", cfh->parent_ino, | 244 | pr_debug("fh_to_parent %llx/%d\n", cfh->parent_ino, |
235 | cfh->parent_name_hash); | 245 | cfh->parent_name_hash); |
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index e622863b292f..086f381d6489 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c | |||
@@ -31,18 +31,18 @@ | |||
31 | 31 | ||
32 | /* create a new cifs key */ | 32 | /* create a new cifs key */ |
33 | static int | 33 | static int |
34 | cifs_spnego_key_instantiate(struct key *key, const void *data, size_t datalen) | 34 | cifs_spnego_key_instantiate(struct key *key, struct key_preparsed_payload *prep) |
35 | { | 35 | { |
36 | char *payload; | 36 | char *payload; |
37 | int ret; | 37 | int ret; |
38 | 38 | ||
39 | ret = -ENOMEM; | 39 | ret = -ENOMEM; |
40 | payload = kmalloc(datalen, GFP_KERNEL); | 40 | payload = kmalloc(prep->datalen, GFP_KERNEL); |
41 | if (!payload) | 41 | if (!payload) |
42 | goto error; | 42 | goto error; |
43 | 43 | ||
44 | /* attach the data */ | 44 | /* attach the data */ |
45 | memcpy(payload, data, datalen); | 45 | memcpy(payload, prep->data, prep->datalen); |
46 | key->payload.data = payload; | 46 | key->payload.data = payload; |
47 | ret = 0; | 47 | ret = 0; |
48 | 48 | ||
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 53cf2aabce87..71d5d0a5f6b2 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c | |||
@@ -203,6 +203,27 @@ cifs_strtoUTF16(__le16 *to, const char *from, int len, | |||
203 | int i; | 203 | int i; |
204 | wchar_t wchar_to; /* needed to quiet sparse */ | 204 | wchar_t wchar_to; /* needed to quiet sparse */ |
205 | 205 | ||
206 | /* special case for utf8 to handle no plane0 chars */ | ||
207 | if (!strcmp(codepage->charset, "utf8")) { | ||
208 | /* | ||
209 | * convert utf8 -> utf16, we assume we have enough space | ||
210 | * as caller should have assumed conversion does not overflow | ||
211 | * in destination len is length in wchar_t units (16bits) | ||
212 | */ | ||
213 | i = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN, | ||
214 | (wchar_t *) to, len); | ||
215 | |||
216 | /* if success terminate and exit */ | ||
217 | if (i >= 0) | ||
218 | goto success; | ||
219 | /* | ||
220 | * if fails fall back to UCS encoding as this | ||
221 | * function should not return negative values | ||
222 | * currently can fail only if source contains | ||
223 | * invalid encoded characters | ||
224 | */ | ||
225 | } | ||
226 | |||
206 | for (i = 0; len && *from; i++, from += charlen, len -= charlen) { | 227 | for (i = 0; len && *from; i++, from += charlen, len -= charlen) { |
207 | charlen = codepage->char2uni(from, len, &wchar_to); | 228 | charlen = codepage->char2uni(from, len, &wchar_to); |
208 | if (charlen < 1) { | 229 | if (charlen < 1) { |
@@ -215,6 +236,7 @@ cifs_strtoUTF16(__le16 *to, const char *from, int len, | |||
215 | put_unaligned_le16(wchar_to, &to[i]); | 236 | put_unaligned_le16(wchar_to, &to[i]); |
216 | } | 237 | } |
217 | 238 | ||
239 | success: | ||
218 | put_unaligned_le16(0, &to[i]); | 240 | put_unaligned_le16(0, &to[i]); |
219 | return i; | 241 | return i; |
220 | } | 242 | } |
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 2ee5c54797fa..fc783e264420 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c | |||
@@ -167,17 +167,17 @@ static struct shrinker cifs_shrinker = { | |||
167 | }; | 167 | }; |
168 | 168 | ||
169 | static int | 169 | static int |
170 | cifs_idmap_key_instantiate(struct key *key, const void *data, size_t datalen) | 170 | cifs_idmap_key_instantiate(struct key *key, struct key_preparsed_payload *prep) |
171 | { | 171 | { |
172 | char *payload; | 172 | char *payload; |
173 | 173 | ||
174 | payload = kmalloc(datalen, GFP_KERNEL); | 174 | payload = kmalloc(prep->datalen, GFP_KERNEL); |
175 | if (!payload) | 175 | if (!payload) |
176 | return -ENOMEM; | 176 | return -ENOMEM; |
177 | 177 | ||
178 | memcpy(payload, data, datalen); | 178 | memcpy(payload, prep->data, prep->datalen); |
179 | key->payload.data = payload; | 179 | key->payload.data = payload; |
180 | key->datalen = datalen; | 180 | key->datalen = prep->datalen; |
181 | return 0; | 181 | return 0; |
182 | } | 182 | } |
183 | 183 | ||
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 2fdbe08a7a23..5c670b998ffb 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -67,6 +67,7 @@ enum { | |||
67 | /* Mount options that take no arguments */ | 67 | /* Mount options that take no arguments */ |
68 | Opt_user_xattr, Opt_nouser_xattr, | 68 | Opt_user_xattr, Opt_nouser_xattr, |
69 | Opt_forceuid, Opt_noforceuid, | 69 | Opt_forceuid, Opt_noforceuid, |
70 | Opt_forcegid, Opt_noforcegid, | ||
70 | Opt_noblocksend, Opt_noautotune, | 71 | Opt_noblocksend, Opt_noautotune, |
71 | Opt_hard, Opt_soft, Opt_perm, Opt_noperm, | 72 | Opt_hard, Opt_soft, Opt_perm, Opt_noperm, |
72 | Opt_mapchars, Opt_nomapchars, Opt_sfu, | 73 | Opt_mapchars, Opt_nomapchars, Opt_sfu, |
@@ -117,6 +118,8 @@ static const match_table_t cifs_mount_option_tokens = { | |||
117 | { Opt_nouser_xattr, "nouser_xattr" }, | 118 | { Opt_nouser_xattr, "nouser_xattr" }, |
118 | { Opt_forceuid, "forceuid" }, | 119 | { Opt_forceuid, "forceuid" }, |
119 | { Opt_noforceuid, "noforceuid" }, | 120 | { Opt_noforceuid, "noforceuid" }, |
121 | { Opt_forcegid, "forcegid" }, | ||
122 | { Opt_noforcegid, "noforcegid" }, | ||
120 | { Opt_noblocksend, "noblocksend" }, | 123 | { Opt_noblocksend, "noblocksend" }, |
121 | { Opt_noautotune, "noautotune" }, | 124 | { Opt_noautotune, "noautotune" }, |
122 | { Opt_hard, "hard" }, | 125 | { Opt_hard, "hard" }, |
@@ -1195,6 +1198,12 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1195 | case Opt_noforceuid: | 1198 | case Opt_noforceuid: |
1196 | override_uid = 0; | 1199 | override_uid = 0; |
1197 | break; | 1200 | break; |
1201 | case Opt_forcegid: | ||
1202 | override_gid = 1; | ||
1203 | break; | ||
1204 | case Opt_noforcegid: | ||
1205 | override_gid = 0; | ||
1206 | break; | ||
1198 | case Opt_noblocksend: | 1207 | case Opt_noblocksend: |
1199 | vol->noblocksnd = 1; | 1208 | vol->noblocksnd = 1; |
1200 | break; | 1209 | break; |
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 2126ab185045..76d974c952fe 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c | |||
@@ -183,6 +183,12 @@ smb_send_kvec(struct TCP_Server_Info *server, struct kvec *iov, size_t n_vec, | |||
183 | rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec], | 183 | rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec], |
184 | n_vec - first_vec, remaining); | 184 | n_vec - first_vec, remaining); |
185 | if (rc == -ENOSPC || rc == -EAGAIN) { | 185 | if (rc == -ENOSPC || rc == -EAGAIN) { |
186 | /* | ||
187 | * Catch if a low level driver returns -ENOSPC. This | ||
188 | * WARN_ON will be removed by 3.10 if no one reports | ||
189 | * seeing this. | ||
190 | */ | ||
191 | WARN_ON_ONCE(rc == -ENOSPC); | ||
186 | i++; | 192 | i++; |
187 | if (i >= 14 || (!server->noblocksnd && (i > 2))) { | 193 | if (i >= 14 || (!server->noblocksnd && (i > 2))) { |
188 | cERROR(1, "sends on sock %p stuck for 15 " | 194 | cERROR(1, "sends on sock %p stuck for 15 " |
diff --git a/fs/compat.c b/fs/compat.c index b7a24d0ca30d..015e1e1f87c6 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -776,16 +776,16 @@ asmlinkage long compat_sys_mount(const char __user * dev_name, | |||
776 | char *kernel_type; | 776 | char *kernel_type; |
777 | unsigned long data_page; | 777 | unsigned long data_page; |
778 | char *kernel_dev; | 778 | char *kernel_dev; |
779 | char *dir_page; | 779 | struct filename *dir; |
780 | int retval; | 780 | int retval; |
781 | 781 | ||
782 | retval = copy_mount_string(type, &kernel_type); | 782 | retval = copy_mount_string(type, &kernel_type); |
783 | if (retval < 0) | 783 | if (retval < 0) |
784 | goto out; | 784 | goto out; |
785 | 785 | ||
786 | dir_page = getname(dir_name); | 786 | dir = getname(dir_name); |
787 | retval = PTR_ERR(dir_page); | 787 | retval = PTR_ERR(dir); |
788 | if (IS_ERR(dir_page)) | 788 | if (IS_ERR(dir)) |
789 | goto out1; | 789 | goto out1; |
790 | 790 | ||
791 | retval = copy_mount_string(dev_name, &kernel_dev); | 791 | retval = copy_mount_string(dev_name, &kernel_dev); |
@@ -807,7 +807,7 @@ asmlinkage long compat_sys_mount(const char __user * dev_name, | |||
807 | } | 807 | } |
808 | } | 808 | } |
809 | 809 | ||
810 | retval = do_mount(kernel_dev, dir_page, kernel_type, | 810 | retval = do_mount(kernel_dev, dir->name, kernel_type, |
811 | flags, (void*)data_page); | 811 | flags, (void*)data_page); |
812 | 812 | ||
813 | out4: | 813 | out4: |
@@ -815,7 +815,7 @@ asmlinkage long compat_sys_mount(const char __user * dev_name, | |||
815 | out3: | 815 | out3: |
816 | kfree(kernel_dev); | 816 | kfree(kernel_dev); |
817 | out2: | 817 | out2: |
818 | putname(dir_page); | 818 | putname(dir); |
819 | out1: | 819 | out1: |
820 | kfree(kernel_type); | 820 | kfree(kernel_type); |
821 | out: | 821 | out: |
diff --git a/fs/coredump.c b/fs/coredump.c index fd37facac8dc..ce47379bfa61 100644 --- a/fs/coredump.c +++ b/fs/coredump.c | |||
@@ -450,11 +450,12 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) | |||
450 | 450 | ||
451 | cp->file = files[1]; | 451 | cp->file = files[1]; |
452 | 452 | ||
453 | replace_fd(0, files[0], 0); | 453 | err = replace_fd(0, files[0], 0); |
454 | fput(files[0]); | ||
454 | /* and disallow core files too */ | 455 | /* and disallow core files too */ |
455 | current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; | 456 | current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; |
456 | 457 | ||
457 | return 0; | 458 | return err; |
458 | } | 459 | } |
459 | 460 | ||
460 | void do_coredump(siginfo_t *siginfo, struct pt_regs *regs) | 461 | void do_coredump(siginfo_t *siginfo, struct pt_regs *regs) |
@@ -59,7 +59,6 @@ | |||
59 | #include <asm/uaccess.h> | 59 | #include <asm/uaccess.h> |
60 | #include <asm/mmu_context.h> | 60 | #include <asm/mmu_context.h> |
61 | #include <asm/tlb.h> | 61 | #include <asm/tlb.h> |
62 | #include <asm/exec.h> | ||
63 | 62 | ||
64 | #include <trace/events/task.h> | 63 | #include <trace/events/task.h> |
65 | #include "internal.h" | 64 | #include "internal.h" |
@@ -106,7 +105,7 @@ static inline void put_binfmt(struct linux_binfmt * fmt) | |||
106 | SYSCALL_DEFINE1(uselib, const char __user *, library) | 105 | SYSCALL_DEFINE1(uselib, const char __user *, library) |
107 | { | 106 | { |
108 | struct file *file; | 107 | struct file *file; |
109 | char *tmp = getname(library); | 108 | struct filename *tmp = getname(library); |
110 | int error = PTR_ERR(tmp); | 109 | int error = PTR_ERR(tmp); |
111 | static const struct open_flags uselib_flags = { | 110 | static const struct open_flags uselib_flags = { |
112 | .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, | 111 | .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, |
@@ -392,7 +391,7 @@ struct user_arg_ptr { | |||
392 | union { | 391 | union { |
393 | const char __user *const __user *native; | 392 | const char __user *const __user *native; |
394 | #ifdef CONFIG_COMPAT | 393 | #ifdef CONFIG_COMPAT |
395 | compat_uptr_t __user *compat; | 394 | const compat_uptr_t __user *compat; |
396 | #endif | 395 | #endif |
397 | } ptr; | 396 | } ptr; |
398 | }; | 397 | }; |
@@ -752,13 +751,14 @@ struct file *open_exec(const char *name) | |||
752 | { | 751 | { |
753 | struct file *file; | 752 | struct file *file; |
754 | int err; | 753 | int err; |
754 | struct filename tmp = { .name = name }; | ||
755 | static const struct open_flags open_exec_flags = { | 755 | static const struct open_flags open_exec_flags = { |
756 | .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, | 756 | .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, |
757 | .acc_mode = MAY_EXEC | MAY_OPEN, | 757 | .acc_mode = MAY_EXEC | MAY_OPEN, |
758 | .intent = LOOKUP_OPEN | 758 | .intent = LOOKUP_OPEN |
759 | }; | 759 | }; |
760 | 760 | ||
761 | file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW); | 761 | file = do_filp_open(AT_FDCWD, &tmp, &open_exec_flags, LOOKUP_FOLLOW); |
762 | if (IS_ERR(file)) | 762 | if (IS_ERR(file)) |
763 | goto out; | 763 | goto out; |
764 | 764 | ||
@@ -1574,9 +1574,9 @@ int do_execve(const char *filename, | |||
1574 | } | 1574 | } |
1575 | 1575 | ||
1576 | #ifdef CONFIG_COMPAT | 1576 | #ifdef CONFIG_COMPAT |
1577 | int compat_do_execve(char *filename, | 1577 | int compat_do_execve(const char *filename, |
1578 | compat_uptr_t __user *__argv, | 1578 | const compat_uptr_t __user *__argv, |
1579 | compat_uptr_t __user *__envp, | 1579 | const compat_uptr_t __user *__envp, |
1580 | struct pt_regs *regs) | 1580 | struct pt_regs *regs) |
1581 | { | 1581 | { |
1582 | struct user_arg_ptr argv = { | 1582 | struct user_arg_ptr argv = { |
@@ -1658,3 +1658,56 @@ int get_dumpable(struct mm_struct *mm) | |||
1658 | { | 1658 | { |
1659 | return __get_dumpable(mm->flags); | 1659 | return __get_dumpable(mm->flags); |
1660 | } | 1660 | } |
1661 | |||
1662 | #ifdef __ARCH_WANT_SYS_EXECVE | ||
1663 | SYSCALL_DEFINE3(execve, | ||
1664 | const char __user *, filename, | ||
1665 | const char __user *const __user *, argv, | ||
1666 | const char __user *const __user *, envp) | ||
1667 | { | ||
1668 | struct filename *path = getname(filename); | ||
1669 | int error = PTR_ERR(path); | ||
1670 | if (!IS_ERR(path)) { | ||
1671 | error = do_execve(path->name, argv, envp, current_pt_regs()); | ||
1672 | putname(path); | ||
1673 | } | ||
1674 | return error; | ||
1675 | } | ||
1676 | #ifdef CONFIG_COMPAT | ||
1677 | asmlinkage long compat_sys_execve(const char __user * filename, | ||
1678 | const compat_uptr_t __user * argv, | ||
1679 | const compat_uptr_t __user * envp) | ||
1680 | { | ||
1681 | struct filename *path = getname(filename); | ||
1682 | int error = PTR_ERR(path); | ||
1683 | if (!IS_ERR(path)) { | ||
1684 | error = compat_do_execve(path->name, argv, envp, | ||
1685 | current_pt_regs()); | ||
1686 | putname(path); | ||
1687 | } | ||
1688 | return error; | ||
1689 | } | ||
1690 | #endif | ||
1691 | #endif | ||
1692 | |||
1693 | #ifdef __ARCH_WANT_KERNEL_EXECVE | ||
1694 | int kernel_execve(const char *filename, | ||
1695 | const char *const argv[], | ||
1696 | const char *const envp[]) | ||
1697 | { | ||
1698 | struct pt_regs *p = current_pt_regs(); | ||
1699 | int ret; | ||
1700 | |||
1701 | ret = do_execve(filename, | ||
1702 | (const char __user *const __user *)argv, | ||
1703 | (const char __user *const __user *)envp, p); | ||
1704 | if (ret < 0) | ||
1705 | return ret; | ||
1706 | |||
1707 | /* | ||
1708 | * We were successful. We won't be returning to our caller, but | ||
1709 | * instead to user space by manipulating the kernel stack. | ||
1710 | */ | ||
1711 | ret_from_kernel_execve(p); | ||
1712 | } | ||
1713 | #endif | ||
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index 1585db1aa365..f936cb50dc0d 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c | |||
@@ -814,8 +814,8 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp) | |||
814 | struct bio *bio; | 814 | struct bio *bio; |
815 | 815 | ||
816 | if (per_dev != master_dev) { | 816 | if (per_dev != master_dev) { |
817 | bio = bio_kmalloc(GFP_KERNEL, | 817 | bio = bio_clone_kmalloc(master_dev->bio, |
818 | master_dev->bio->bi_max_vecs); | 818 | GFP_KERNEL); |
819 | if (unlikely(!bio)) { | 819 | if (unlikely(!bio)) { |
820 | ORE_DBGMSG( | 820 | ORE_DBGMSG( |
821 | "Failed to allocate BIO size=%u\n", | 821 | "Failed to allocate BIO size=%u\n", |
@@ -824,7 +824,6 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp) | |||
824 | goto out; | 824 | goto out; |
825 | } | 825 | } |
826 | 826 | ||
827 | __bio_clone(bio, master_dev->bio); | ||
828 | bio->bi_bdev = NULL; | 827 | bio->bi_bdev = NULL; |
829 | bio->bi_next = NULL; | 828 | bio->bi_next = NULL; |
830 | per_dev->offset = master_dev->offset; | 829 | per_dev->offset = master_dev->offset; |
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 59e3bbfac0b1..5e59280d42d7 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -389,8 +389,6 @@ static int exofs_sync_fs(struct super_block *sb, int wait) | |||
389 | if (unlikely(ret)) | 389 | if (unlikely(ret)) |
390 | goto out; | 390 | goto out; |
391 | 391 | ||
392 | lock_super(sb); | ||
393 | |||
394 | ios->length = offsetof(struct exofs_fscb, s_dev_table_oid); | 392 | ios->length = offsetof(struct exofs_fscb, s_dev_table_oid); |
395 | memset(fscb, 0, ios->length); | 393 | memset(fscb, 0, ios->length); |
396 | fscb->s_nextid = cpu_to_le64(sbi->s_nextid); | 394 | fscb->s_nextid = cpu_to_le64(sbi->s_nextid); |
@@ -406,8 +404,6 @@ static int exofs_sync_fs(struct super_block *sb, int wait) | |||
406 | if (unlikely(ret)) | 404 | if (unlikely(ret)) |
407 | EXOFS_ERR("%s: ore_write failed.\n", __func__); | 405 | EXOFS_ERR("%s: ore_write failed.\n", __func__); |
408 | 406 | ||
409 | |||
410 | unlock_super(sb); | ||
411 | out: | 407 | out: |
412 | EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); | 408 | EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); |
413 | ore_put_io_state(ios); | 409 | ore_put_io_state(ios); |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index ebf8312c3a4e..5366393528df 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -2578,11 +2578,9 @@ out: | |||
2578 | static int ext3_unfreeze(struct super_block *sb) | 2578 | static int ext3_unfreeze(struct super_block *sb) |
2579 | { | 2579 | { |
2580 | if (!(sb->s_flags & MS_RDONLY)) { | 2580 | if (!(sb->s_flags & MS_RDONLY)) { |
2581 | lock_super(sb); | ||
2582 | /* Reser the needs_recovery flag before the fs is unlocked. */ | 2581 | /* Reser the needs_recovery flag before the fs is unlocked. */ |
2583 | EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); | 2582 | EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); |
2584 | ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); | 2583 | ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); |
2585 | unlock_super(sb); | ||
2586 | journal_unlock_updates(EXT3_SB(sb)->s_journal); | 2584 | journal_unlock_updates(EXT3_SB(sb)->s_journal); |
2587 | } | 2585 | } |
2588 | return 0; | 2586 | return 0; |
@@ -2602,7 +2600,6 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) | |||
2602 | #endif | 2600 | #endif |
2603 | 2601 | ||
2604 | /* Store the original options */ | 2602 | /* Store the original options */ |
2605 | lock_super(sb); | ||
2606 | old_sb_flags = sb->s_flags; | 2603 | old_sb_flags = sb->s_flags; |
2607 | old_opts.s_mount_opt = sbi->s_mount_opt; | 2604 | old_opts.s_mount_opt = sbi->s_mount_opt; |
2608 | old_opts.s_resuid = sbi->s_resuid; | 2605 | old_opts.s_resuid = sbi->s_resuid; |
@@ -2708,8 +2705,6 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) | |||
2708 | old_opts.s_qf_names[i] != sbi->s_qf_names[i]) | 2705 | old_opts.s_qf_names[i] != sbi->s_qf_names[i]) |
2709 | kfree(old_opts.s_qf_names[i]); | 2706 | kfree(old_opts.s_qf_names[i]); |
2710 | #endif | 2707 | #endif |
2711 | unlock_super(sb); | ||
2712 | |||
2713 | if (enable_quota) | 2708 | if (enable_quota) |
2714 | dquot_resume(sb, -1); | 2709 | dquot_resume(sb, -1); |
2715 | return 0; | 2710 | return 0; |
@@ -2728,7 +2723,6 @@ restore_opts: | |||
2728 | sbi->s_qf_names[i] = old_opts.s_qf_names[i]; | 2723 | sbi->s_qf_names[i] = old_opts.s_qf_names[i]; |
2729 | } | 2724 | } |
2730 | #endif | 2725 | #endif |
2731 | unlock_super(sb); | ||
2732 | return err; | 2726 | return err; |
2733 | } | 2727 | } |
2734 | 2728 | ||
diff --git a/fs/fat/dir.c b/fs/fat/dir.c index bca6d0a1255e..2a182342442e 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c | |||
@@ -571,7 +571,7 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent, | |||
571 | int short_len = 0, fill_len = 0; | 571 | int short_len = 0, fill_len = 0; |
572 | int ret = 0; | 572 | int ret = 0; |
573 | 573 | ||
574 | lock_super(sb); | 574 | mutex_lock(&sbi->s_lock); |
575 | 575 | ||
576 | cpos = filp->f_pos; | 576 | cpos = filp->f_pos; |
577 | /* Fake . and .. for the root directory. */ | 577 | /* Fake . and .. for the root directory. */ |
@@ -693,7 +693,7 @@ fill_failed: | |||
693 | if (unicode) | 693 | if (unicode) |
694 | __putname(unicode); | 694 | __putname(unicode); |
695 | out: | 695 | out: |
696 | unlock_super(sb); | 696 | mutex_unlock(&sbi->s_lock); |
697 | return ret; | 697 | return ret; |
698 | } | 698 | } |
699 | 699 | ||
diff --git a/fs/fat/fat.h b/fs/fat/fat.h index ca7e8f8bad7c..623f36f0423b 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h | |||
@@ -71,8 +71,9 @@ struct msdos_sb_info { | |||
71 | unsigned long root_cluster; /* first cluster of the root directory */ | 71 | unsigned long root_cluster; /* first cluster of the root directory */ |
72 | unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */ | 72 | unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */ |
73 | struct mutex fat_lock; | 73 | struct mutex fat_lock; |
74 | unsigned int prev_free; /* previously allocated cluster number */ | 74 | struct mutex s_lock; |
75 | unsigned int free_clusters; /* -1 if undefined */ | 75 | unsigned int prev_free; /* previously allocated cluster number */ |
76 | unsigned int free_clusters; /* -1 if undefined */ | ||
76 | unsigned int free_clus_valid; /* is free_clusters valid? */ | 77 | unsigned int free_clus_valid; /* is free_clusters valid? */ |
77 | struct fat_mount_options options; | 78 | struct fat_mount_options options; |
78 | struct nls_table *nls_disk; /* Codepage used on disk */ | 79 | struct nls_table *nls_disk; /* Codepage used on disk */ |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 76f60c642c06..5bafaad00530 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -673,9 +673,9 @@ static int fat_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
673 | if (inode->i_ino == MSDOS_FSINFO_INO) { | 673 | if (inode->i_ino == MSDOS_FSINFO_INO) { |
674 | struct super_block *sb = inode->i_sb; | 674 | struct super_block *sb = inode->i_sb; |
675 | 675 | ||
676 | lock_super(sb); | 676 | mutex_lock(&MSDOS_SB(sb)->s_lock); |
677 | err = fat_clusters_flush(sb); | 677 | err = fat_clusters_flush(sb); |
678 | unlock_super(sb); | 678 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
679 | } else | 679 | } else |
680 | err = __fat_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); | 680 | err = __fat_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); |
681 | 681 | ||
@@ -1268,6 +1268,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, | |||
1268 | b = (struct fat_boot_sector *) bh->b_data; | 1268 | b = (struct fat_boot_sector *) bh->b_data; |
1269 | } | 1269 | } |
1270 | 1270 | ||
1271 | mutex_init(&sbi->s_lock); | ||
1271 | sbi->cluster_size = sb->s_blocksize * sbi->sec_per_clus; | 1272 | sbi->cluster_size = sb->s_blocksize * sbi->sec_per_clus; |
1272 | sbi->cluster_bits = ffs(sbi->cluster_size) - 1; | 1273 | sbi->cluster_bits = ffs(sbi->cluster_size) - 1; |
1273 | sbi->fats = b->fats; | 1274 | sbi->fats = b->fats; |
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index c1055e778fff..e2cfda94a28d 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c | |||
@@ -208,7 +208,7 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, | |||
208 | struct inode *inode; | 208 | struct inode *inode; |
209 | int err; | 209 | int err; |
210 | 210 | ||
211 | lock_super(sb); | 211 | mutex_lock(&MSDOS_SB(sb)->s_lock); |
212 | err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); | 212 | err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); |
213 | switch (err) { | 213 | switch (err) { |
214 | case -ENOENT: | 214 | case -ENOENT: |
@@ -221,7 +221,7 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, | |||
221 | default: | 221 | default: |
222 | inode = ERR_PTR(err); | 222 | inode = ERR_PTR(err); |
223 | } | 223 | } |
224 | unlock_super(sb); | 224 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
225 | return d_splice_alias(inode, dentry); | 225 | return d_splice_alias(inode, dentry); |
226 | } | 226 | } |
227 | 227 | ||
@@ -273,7 +273,7 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, umode_t mode, | |||
273 | unsigned char msdos_name[MSDOS_NAME]; | 273 | unsigned char msdos_name[MSDOS_NAME]; |
274 | int err, is_hid; | 274 | int err, is_hid; |
275 | 275 | ||
276 | lock_super(sb); | 276 | mutex_lock(&MSDOS_SB(sb)->s_lock); |
277 | 277 | ||
278 | err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, | 278 | err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, |
279 | msdos_name, &MSDOS_SB(sb)->options); | 279 | msdos_name, &MSDOS_SB(sb)->options); |
@@ -302,7 +302,7 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, umode_t mode, | |||
302 | 302 | ||
303 | d_instantiate(dentry, inode); | 303 | d_instantiate(dentry, inode); |
304 | out: | 304 | out: |
305 | unlock_super(sb); | 305 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
306 | if (!err) | 306 | if (!err) |
307 | err = fat_flush_inodes(sb, dir, inode); | 307 | err = fat_flush_inodes(sb, dir, inode); |
308 | return err; | 308 | return err; |
@@ -316,7 +316,7 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) | |||
316 | struct fat_slot_info sinfo; | 316 | struct fat_slot_info sinfo; |
317 | int err; | 317 | int err; |
318 | 318 | ||
319 | lock_super(sb); | 319 | mutex_lock(&MSDOS_SB(sb)->s_lock); |
320 | /* | 320 | /* |
321 | * Check whether the directory is not in use, then check | 321 | * Check whether the directory is not in use, then check |
322 | * whether it is empty. | 322 | * whether it is empty. |
@@ -337,7 +337,7 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) | |||
337 | inode->i_ctime = CURRENT_TIME_SEC; | 337 | inode->i_ctime = CURRENT_TIME_SEC; |
338 | fat_detach(inode); | 338 | fat_detach(inode); |
339 | out: | 339 | out: |
340 | unlock_super(sb); | 340 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
341 | if (!err) | 341 | if (!err) |
342 | err = fat_flush_inodes(sb, dir, inode); | 342 | err = fat_flush_inodes(sb, dir, inode); |
343 | 343 | ||
@@ -354,7 +354,7 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
354 | struct timespec ts; | 354 | struct timespec ts; |
355 | int err, is_hid, cluster; | 355 | int err, is_hid, cluster; |
356 | 356 | ||
357 | lock_super(sb); | 357 | mutex_lock(&MSDOS_SB(sb)->s_lock); |
358 | 358 | ||
359 | err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, | 359 | err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, |
360 | msdos_name, &MSDOS_SB(sb)->options); | 360 | msdos_name, &MSDOS_SB(sb)->options); |
@@ -392,14 +392,14 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
392 | 392 | ||
393 | d_instantiate(dentry, inode); | 393 | d_instantiate(dentry, inode); |
394 | 394 | ||
395 | unlock_super(sb); | 395 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
396 | fat_flush_inodes(sb, dir, inode); | 396 | fat_flush_inodes(sb, dir, inode); |
397 | return 0; | 397 | return 0; |
398 | 398 | ||
399 | out_free: | 399 | out_free: |
400 | fat_free_clusters(dir, cluster); | 400 | fat_free_clusters(dir, cluster); |
401 | out: | 401 | out: |
402 | unlock_super(sb); | 402 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
403 | return err; | 403 | return err; |
404 | } | 404 | } |
405 | 405 | ||
@@ -411,7 +411,7 @@ static int msdos_unlink(struct inode *dir, struct dentry *dentry) | |||
411 | struct fat_slot_info sinfo; | 411 | struct fat_slot_info sinfo; |
412 | int err; | 412 | int err; |
413 | 413 | ||
414 | lock_super(sb); | 414 | mutex_lock(&MSDOS_SB(sb)->s_lock); |
415 | err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); | 415 | err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); |
416 | if (err) | 416 | if (err) |
417 | goto out; | 417 | goto out; |
@@ -423,7 +423,7 @@ static int msdos_unlink(struct inode *dir, struct dentry *dentry) | |||
423 | inode->i_ctime = CURRENT_TIME_SEC; | 423 | inode->i_ctime = CURRENT_TIME_SEC; |
424 | fat_detach(inode); | 424 | fat_detach(inode); |
425 | out: | 425 | out: |
426 | unlock_super(sb); | 426 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
427 | if (!err) | 427 | if (!err) |
428 | err = fat_flush_inodes(sb, dir, inode); | 428 | err = fat_flush_inodes(sb, dir, inode); |
429 | 429 | ||
@@ -606,7 +606,7 @@ static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
606 | unsigned char old_msdos_name[MSDOS_NAME], new_msdos_name[MSDOS_NAME]; | 606 | unsigned char old_msdos_name[MSDOS_NAME], new_msdos_name[MSDOS_NAME]; |
607 | int err, is_hid; | 607 | int err, is_hid; |
608 | 608 | ||
609 | lock_super(sb); | 609 | mutex_lock(&MSDOS_SB(sb)->s_lock); |
610 | 610 | ||
611 | err = msdos_format_name(old_dentry->d_name.name, | 611 | err = msdos_format_name(old_dentry->d_name.name, |
612 | old_dentry->d_name.len, old_msdos_name, | 612 | old_dentry->d_name.len, old_msdos_name, |
@@ -625,7 +625,7 @@ static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
625 | err = do_msdos_rename(old_dir, old_msdos_name, old_dentry, | 625 | err = do_msdos_rename(old_dir, old_msdos_name, old_dentry, |
626 | new_dir, new_msdos_name, new_dentry, is_hid); | 626 | new_dir, new_msdos_name, new_dentry, is_hid); |
627 | out: | 627 | out: |
628 | unlock_super(sb); | 628 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
629 | if (!err) | 629 | if (!err) |
630 | err = fat_flush_inodes(sb, old_dir, new_dir); | 630 | err = fat_flush_inodes(sb, old_dir, new_dir); |
631 | return err; | 631 | return err; |
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index e535dd75b986..ac959d655e7d 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c | |||
@@ -721,7 +721,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, | |||
721 | struct dentry *alias; | 721 | struct dentry *alias; |
722 | int err; | 722 | int err; |
723 | 723 | ||
724 | lock_super(sb); | 724 | mutex_lock(&MSDOS_SB(sb)->s_lock); |
725 | 725 | ||
726 | err = vfat_find(dir, &dentry->d_name, &sinfo); | 726 | err = vfat_find(dir, &dentry->d_name, &sinfo); |
727 | if (err) { | 727 | if (err) { |
@@ -752,13 +752,13 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, | |||
752 | if (!S_ISDIR(inode->i_mode)) | 752 | if (!S_ISDIR(inode->i_mode)) |
753 | d_move(alias, dentry); | 753 | d_move(alias, dentry); |
754 | iput(inode); | 754 | iput(inode); |
755 | unlock_super(sb); | 755 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
756 | return alias; | 756 | return alias; |
757 | } else | 757 | } else |
758 | dput(alias); | 758 | dput(alias); |
759 | 759 | ||
760 | out: | 760 | out: |
761 | unlock_super(sb); | 761 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
762 | dentry->d_time = dentry->d_parent->d_inode->i_version; | 762 | dentry->d_time = dentry->d_parent->d_inode->i_version; |
763 | dentry = d_splice_alias(inode, dentry); | 763 | dentry = d_splice_alias(inode, dentry); |
764 | if (dentry) | 764 | if (dentry) |
@@ -766,7 +766,7 @@ out: | |||
766 | return dentry; | 766 | return dentry; |
767 | 767 | ||
768 | error: | 768 | error: |
769 | unlock_super(sb); | 769 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
770 | return ERR_PTR(err); | 770 | return ERR_PTR(err); |
771 | } | 771 | } |
772 | 772 | ||
@@ -779,7 +779,7 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, | |||
779 | struct timespec ts; | 779 | struct timespec ts; |
780 | int err; | 780 | int err; |
781 | 781 | ||
782 | lock_super(sb); | 782 | mutex_lock(&MSDOS_SB(sb)->s_lock); |
783 | 783 | ||
784 | ts = CURRENT_TIME_SEC; | 784 | ts = CURRENT_TIME_SEC; |
785 | err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &ts, &sinfo); | 785 | err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &ts, &sinfo); |
@@ -800,7 +800,7 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, | |||
800 | dentry->d_time = dentry->d_parent->d_inode->i_version; | 800 | dentry->d_time = dentry->d_parent->d_inode->i_version; |
801 | d_instantiate(dentry, inode); | 801 | d_instantiate(dentry, inode); |
802 | out: | 802 | out: |
803 | unlock_super(sb); | 803 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
804 | return err; | 804 | return err; |
805 | } | 805 | } |
806 | 806 | ||
@@ -811,7 +811,7 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) | |||
811 | struct fat_slot_info sinfo; | 811 | struct fat_slot_info sinfo; |
812 | int err; | 812 | int err; |
813 | 813 | ||
814 | lock_super(sb); | 814 | mutex_lock(&MSDOS_SB(sb)->s_lock); |
815 | 815 | ||
816 | err = fat_dir_empty(inode); | 816 | err = fat_dir_empty(inode); |
817 | if (err) | 817 | if (err) |
@@ -829,7 +829,7 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) | |||
829 | inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; | 829 | inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; |
830 | fat_detach(inode); | 830 | fat_detach(inode); |
831 | out: | 831 | out: |
832 | unlock_super(sb); | 832 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
833 | 833 | ||
834 | return err; | 834 | return err; |
835 | } | 835 | } |
@@ -841,7 +841,7 @@ static int vfat_unlink(struct inode *dir, struct dentry *dentry) | |||
841 | struct fat_slot_info sinfo; | 841 | struct fat_slot_info sinfo; |
842 | int err; | 842 | int err; |
843 | 843 | ||
844 | lock_super(sb); | 844 | mutex_lock(&MSDOS_SB(sb)->s_lock); |
845 | 845 | ||
846 | err = vfat_find(dir, &dentry->d_name, &sinfo); | 846 | err = vfat_find(dir, &dentry->d_name, &sinfo); |
847 | if (err) | 847 | if (err) |
@@ -854,7 +854,7 @@ static int vfat_unlink(struct inode *dir, struct dentry *dentry) | |||
854 | inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; | 854 | inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; |
855 | fat_detach(inode); | 855 | fat_detach(inode); |
856 | out: | 856 | out: |
857 | unlock_super(sb); | 857 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
858 | 858 | ||
859 | return err; | 859 | return err; |
860 | } | 860 | } |
@@ -867,7 +867,7 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
867 | struct timespec ts; | 867 | struct timespec ts; |
868 | int err, cluster; | 868 | int err, cluster; |
869 | 869 | ||
870 | lock_super(sb); | 870 | mutex_lock(&MSDOS_SB(sb)->s_lock); |
871 | 871 | ||
872 | ts = CURRENT_TIME_SEC; | 872 | ts = CURRENT_TIME_SEC; |
873 | cluster = fat_alloc_new_dir(dir, &ts); | 873 | cluster = fat_alloc_new_dir(dir, &ts); |
@@ -896,13 +896,13 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
896 | dentry->d_time = dentry->d_parent->d_inode->i_version; | 896 | dentry->d_time = dentry->d_parent->d_inode->i_version; |
897 | d_instantiate(dentry, inode); | 897 | d_instantiate(dentry, inode); |
898 | 898 | ||
899 | unlock_super(sb); | 899 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
900 | return 0; | 900 | return 0; |
901 | 901 | ||
902 | out_free: | 902 | out_free: |
903 | fat_free_clusters(dir, cluster); | 903 | fat_free_clusters(dir, cluster); |
904 | out: | 904 | out: |
905 | unlock_super(sb); | 905 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
906 | return err; | 906 | return err; |
907 | } | 907 | } |
908 | 908 | ||
@@ -921,7 +921,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
921 | old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; | 921 | old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; |
922 | old_inode = old_dentry->d_inode; | 922 | old_inode = old_dentry->d_inode; |
923 | new_inode = new_dentry->d_inode; | 923 | new_inode = new_dentry->d_inode; |
924 | lock_super(sb); | 924 | mutex_lock(&MSDOS_SB(sb)->s_lock); |
925 | err = vfat_find(old_dir, &old_dentry->d_name, &old_sinfo); | 925 | err = vfat_find(old_dir, &old_dentry->d_name, &old_sinfo); |
926 | if (err) | 926 | if (err) |
927 | goto out; | 927 | goto out; |
@@ -996,7 +996,7 @@ out: | |||
996 | brelse(sinfo.bh); | 996 | brelse(sinfo.bh); |
997 | brelse(dotdot_bh); | 997 | brelse(dotdot_bh); |
998 | brelse(old_sinfo.bh); | 998 | brelse(old_sinfo.bh); |
999 | unlock_super(sb); | 999 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
1000 | 1000 | ||
1001 | return err; | 1001 | return err; |
1002 | 1002 | ||
@@ -922,6 +922,9 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) | |||
922 | if ((flags & ~O_CLOEXEC) != 0) | 922 | if ((flags & ~O_CLOEXEC) != 0) |
923 | return -EINVAL; | 923 | return -EINVAL; |
924 | 924 | ||
925 | if (unlikely(oldfd == newfd)) | ||
926 | return -EINVAL; | ||
927 | |||
925 | if (newfd >= rlimit(RLIMIT_NOFILE)) | 928 | if (newfd >= rlimit(RLIMIT_NOFILE)) |
926 | return -EMFILE; | 929 | return -EMFILE; |
927 | 930 | ||
diff --git a/fs/file_table.c b/fs/file_table.c index dac67923330f..a72bf9ddd0d2 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -36,7 +36,7 @@ struct files_stat_struct files_stat = { | |||
36 | .max_files = NR_FILE | 36 | .max_files = NR_FILE |
37 | }; | 37 | }; |
38 | 38 | ||
39 | DEFINE_LGLOCK(files_lglock); | 39 | DEFINE_STATIC_LGLOCK(files_lglock); |
40 | 40 | ||
41 | /* SLAB cache for file structures */ | 41 | /* SLAB cache for file structures */ |
42 | static struct kmem_cache *filp_cachep __read_mostly; | 42 | static struct kmem_cache *filp_cachep __read_mostly; |
diff --git a/fs/filesystems.c b/fs/filesystems.c index 96f24286667a..da165f6adcbf 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c | |||
@@ -124,7 +124,7 @@ EXPORT_SYMBOL(unregister_filesystem); | |||
124 | static int fs_index(const char __user * __name) | 124 | static int fs_index(const char __user * __name) |
125 | { | 125 | { |
126 | struct file_system_type * tmp; | 126 | struct file_system_type * tmp; |
127 | char * name; | 127 | struct filename *name; |
128 | int err, index; | 128 | int err, index; |
129 | 129 | ||
130 | name = getname(__name); | 130 | name = getname(__name); |
@@ -135,7 +135,7 @@ static int fs_index(const char __user * __name) | |||
135 | err = -EINVAL; | 135 | err = -EINVAL; |
136 | read_lock(&file_systems_lock); | 136 | read_lock(&file_systems_lock); |
137 | for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) { | 137 | for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) { |
138 | if (strcmp(tmp->name,name) == 0) { | 138 | if (strcmp(tmp->name, name->name) == 0) { |
139 | err = index; | 139 | err = index; |
140 | break; | 140 | break; |
141 | } | 141 | } |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 401b6c6248ae..51ea267d444c 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -249,7 +249,7 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) | |||
249 | } | 249 | } |
250 | 250 | ||
251 | /* | 251 | /* |
252 | * Move expired (dirtied after work->older_than_this) dirty inodes from | 252 | * Move expired (dirtied before work->older_than_this) dirty inodes from |
253 | * @delaying_queue to @dispatch_queue. | 253 | * @delaying_queue to @dispatch_queue. |
254 | */ | 254 | */ |
255 | static int move_expired_inodes(struct list_head *delaying_queue, | 255 | static int move_expired_inodes(struct list_head *delaying_queue, |
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index e8ed6d4a6181..4767774a5f3e 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c | |||
@@ -161,6 +161,8 @@ static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid, | |||
161 | case GFS2_SMALL_FH_SIZE: | 161 | case GFS2_SMALL_FH_SIZE: |
162 | case GFS2_LARGE_FH_SIZE: | 162 | case GFS2_LARGE_FH_SIZE: |
163 | case GFS2_OLD_FH_SIZE: | 163 | case GFS2_OLD_FH_SIZE: |
164 | if (fh_len < GFS2_SMALL_FH_SIZE) | ||
165 | return NULL; | ||
164 | this.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32; | 166 | this.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32; |
165 | this.no_formal_ino |= be32_to_cpu(fh[1]); | 167 | this.no_formal_ino |= be32_to_cpu(fh[1]); |
166 | this.no_addr = ((u64)be32_to_cpu(fh[2])) << 32; | 168 | this.no_addr = ((u64)be32_to_cpu(fh[2])) << 32; |
@@ -180,6 +182,8 @@ static struct dentry *gfs2_fh_to_parent(struct super_block *sb, struct fid *fid, | |||
180 | switch (fh_type) { | 182 | switch (fh_type) { |
181 | case GFS2_LARGE_FH_SIZE: | 183 | case GFS2_LARGE_FH_SIZE: |
182 | case GFS2_OLD_FH_SIZE: | 184 | case GFS2_OLD_FH_SIZE: |
185 | if (fh_len < GFS2_LARGE_FH_SIZE) | ||
186 | return NULL; | ||
183 | parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32; | 187 | parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32; |
184 | parent.no_formal_ino |= be32_to_cpu(fh[5]); | 188 | parent.no_formal_ino |= be32_to_cpu(fh[5]); |
185 | parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32; | 189 | parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32; |
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index 1fe731337f07..9c88da0e855a 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef __UM_FS_HOSTFS | 1 | #ifndef __UM_FS_HOSTFS |
2 | #define __UM_FS_HOSTFS | 2 | #define __UM_FS_HOSTFS |
3 | 3 | ||
4 | #include "os.h" | 4 | #include <os.h> |
5 | 5 | ||
6 | /* | 6 | /* |
7 | * These are exactly the same definitions as in fs.h, but the names are | 7 | * These are exactly the same definitions as in fs.h, but the names are |
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 6c9f3a9d5e21..457addc5c91f 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c | |||
@@ -16,8 +16,8 @@ | |||
16 | #include <linux/mount.h> | 16 | #include <linux/mount.h> |
17 | #include <linux/namei.h> | 17 | #include <linux/namei.h> |
18 | #include "hostfs.h" | 18 | #include "hostfs.h" |
19 | #include "init.h" | 19 | #include <init.h> |
20 | #include "kern.h" | 20 | #include <kern.h> |
21 | 21 | ||
22 | struct hostfs_inode_info { | 22 | struct hostfs_inode_info { |
23 | int fd; | 23 | int fd; |
@@ -848,9 +848,11 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
848 | attr->ia_size != i_size_read(inode)) { | 848 | attr->ia_size != i_size_read(inode)) { |
849 | int error; | 849 | int error; |
850 | 850 | ||
851 | error = vmtruncate(inode, attr->ia_size); | 851 | error = inode_newsize_ok(inode, attr->ia_size); |
852 | if (err) | 852 | if (error) |
853 | return err; | 853 | return error; |
854 | |||
855 | truncate_setsize(inode, attr->ia_size); | ||
854 | } | 856 | } |
855 | 857 | ||
856 | setattr_copy(inode, attr); | 858 | setattr_copy(inode, attr); |
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index a74ad0d371c2..67838f3aa20a 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c | |||
@@ -15,7 +15,6 @@ | |||
15 | #include <sys/types.h> | 15 | #include <sys/types.h> |
16 | #include <sys/vfs.h> | 16 | #include <sys/vfs.h> |
17 | #include "hostfs.h" | 17 | #include "hostfs.h" |
18 | #include "os.h" | ||
19 | #include <utime.h> | 18 | #include <utime.h> |
20 | 19 | ||
21 | static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p) | 20 | static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p) |
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index bc28bf077a6a..a3076228523d 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c | |||
@@ -398,7 +398,6 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) | |||
398 | *flags |= MS_NOATIME; | 398 | *flags |= MS_NOATIME; |
399 | 399 | ||
400 | hpfs_lock(s); | 400 | hpfs_lock(s); |
401 | lock_super(s); | ||
402 | uid = sbi->sb_uid; gid = sbi->sb_gid; | 401 | uid = sbi->sb_uid; gid = sbi->sb_gid; |
403 | umask = 0777 & ~sbi->sb_mode; | 402 | umask = 0777 & ~sbi->sb_mode; |
404 | lowercase = sbi->sb_lowercase; | 403 | lowercase = sbi->sb_lowercase; |
@@ -431,12 +430,10 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) | |||
431 | 430 | ||
432 | replace_mount_options(s, new_opts); | 431 | replace_mount_options(s, new_opts); |
433 | 432 | ||
434 | unlock_super(s); | ||
435 | hpfs_unlock(s); | 433 | hpfs_unlock(s); |
436 | return 0; | 434 | return 0; |
437 | 435 | ||
438 | out_err: | 436 | out_err: |
439 | unlock_super(s); | ||
440 | hpfs_unlock(s); | 437 | hpfs_unlock(s); |
441 | kfree(new_opts); | 438 | kfree(new_opts); |
442 | return -EINVAL; | 439 | return -EINVAL; |
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index c1dffe47fde2..78f21f8dc2ec 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c | |||
@@ -18,7 +18,7 @@ | |||
18 | #include <linux/pid_namespace.h> | 18 | #include <linux/pid_namespace.h> |
19 | #include <linux/namei.h> | 19 | #include <linux/namei.h> |
20 | #include <asm/uaccess.h> | 20 | #include <asm/uaccess.h> |
21 | #include "os.h" | 21 | #include <os.h> |
22 | 22 | ||
23 | static struct inode *get_inode(struct super_block *, struct dentry *); | 23 | static struct inode *get_inode(struct super_block *, struct dentry *); |
24 | 24 | ||
@@ -674,7 +674,7 @@ static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) | |||
674 | 674 | ||
675 | if (!inode) { | 675 | if (!inode) { |
676 | dput(dentry); | 676 | dput(dentry); |
677 | return ERR_PTR(-ENOMEM); | 677 | return NULL; |
678 | } | 678 | } |
679 | 679 | ||
680 | if (S_ISDIR(dentry->d_inode->i_mode)) { | 680 | if (S_ISDIR(dentry->d_inode->i_mode)) { |
diff --git a/fs/internal.h b/fs/internal.h index 371bcc4b1697..916b7cbf3e3e 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -97,8 +97,8 @@ struct open_flags { | |||
97 | int acc_mode; | 97 | int acc_mode; |
98 | int intent; | 98 | int intent; |
99 | }; | 99 | }; |
100 | extern struct file *do_filp_open(int dfd, const char *pathname, | 100 | extern struct file *do_filp_open(int dfd, struct filename *pathname, |
101 | const struct open_flags *op, int lookup_flags); | 101 | const struct open_flags *op, int flags); |
102 | extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, | 102 | extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, |
103 | const char *, const struct open_flags *, int lookup_flags); | 103 | const char *, const struct open_flags *, int lookup_flags); |
104 | 104 | ||
diff --git a/fs/isofs/export.c b/fs/isofs/export.c index 1d3804492aa7..2b4f2358eadb 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c | |||
@@ -175,7 +175,7 @@ static struct dentry *isofs_fh_to_parent(struct super_block *sb, | |||
175 | { | 175 | { |
176 | struct isofs_fid *ifid = (struct isofs_fid *)fid; | 176 | struct isofs_fid *ifid = (struct isofs_fid *)fid; |
177 | 177 | ||
178 | if (fh_type != 2) | 178 | if (fh_len < 2 || fh_type != 2) |
179 | return NULL; | 179 | return NULL; |
180 | 180 | ||
181 | return isofs_export_iget(sb, | 181 | return isofs_export_iget(sb, |
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index ff487954cd96..d3d8799e2187 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c | |||
@@ -100,6 +100,10 @@ static int jffs2_sync_fs(struct super_block *sb, int wait) | |||
100 | { | 100 | { |
101 | struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); | 101 | struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); |
102 | 102 | ||
103 | #ifdef CONFIG_JFFS2_FS_WRITEBUFFER | ||
104 | cancel_delayed_work_sync(&c->wbuf_dwork); | ||
105 | #endif | ||
106 | |||
103 | mutex_lock(&c->alloc_sem); | 107 | mutex_lock(&c->alloc_sem); |
104 | jffs2_flush_wbuf_pad(c); | 108 | jffs2_flush_wbuf_pad(c); |
105 | mutex_unlock(&c->alloc_sem); | 109 | mutex_unlock(&c->alloc_sem); |
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index 6f4529d3697f..a6597d60d76d 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c | |||
@@ -1044,10 +1044,10 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c, | |||
1044 | ops.datbuf = NULL; | 1044 | ops.datbuf = NULL; |
1045 | 1045 | ||
1046 | ret = mtd_read_oob(c->mtd, jeb->offset, &ops); | 1046 | ret = mtd_read_oob(c->mtd, jeb->offset, &ops); |
1047 | if (ret || ops.oobretlen != ops.ooblen) { | 1047 | if ((ret && !mtd_is_bitflip(ret)) || ops.oobretlen != ops.ooblen) { |
1048 | pr_err("cannot read OOB for EB at %08x, requested %zd bytes, read %zd bytes, error %d\n", | 1048 | pr_err("cannot read OOB for EB at %08x, requested %zd bytes, read %zd bytes, error %d\n", |
1049 | jeb->offset, ops.ooblen, ops.oobretlen, ret); | 1049 | jeb->offset, ops.ooblen, ops.oobretlen, ret); |
1050 | if (!ret) | 1050 | if (!ret || mtd_is_bitflip(ret)) |
1051 | ret = -EIO; | 1051 | ret = -EIO; |
1052 | return ret; | 1052 | return ret; |
1053 | } | 1053 | } |
@@ -1086,10 +1086,10 @@ int jffs2_check_nand_cleanmarker(struct jffs2_sb_info *c, | |||
1086 | ops.datbuf = NULL; | 1086 | ops.datbuf = NULL; |
1087 | 1087 | ||
1088 | ret = mtd_read_oob(c->mtd, jeb->offset, &ops); | 1088 | ret = mtd_read_oob(c->mtd, jeb->offset, &ops); |
1089 | if (ret || ops.oobretlen != ops.ooblen) { | 1089 | if ((ret && !mtd_is_bitflip(ret)) || ops.oobretlen != ops.ooblen) { |
1090 | pr_err("cannot read OOB for EB at %08x, requested %zd bytes, read %zd bytes, error %d\n", | 1090 | pr_err("cannot read OOB for EB at %08x, requested %zd bytes, read %zd bytes, error %d\n", |
1091 | jeb->offset, ops.ooblen, ops.oobretlen, ret); | 1091 | jeb->offset, ops.ooblen, ops.oobretlen, ret); |
1092 | if (!ret) | 1092 | if (!ret || mtd_is_bitflip(ret)) |
1093 | ret = -EIO; | 1093 | ret = -EIO; |
1094 | return ret; | 1094 | return ret; |
1095 | } | 1095 | } |
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 7ef14b3c5bee..e4fb3ba5a58a 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c | |||
@@ -7,7 +7,6 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/types.h> | 9 | #include <linux/types.h> |
10 | #include <linux/utsname.h> | ||
11 | #include <linux/kernel.h> | 10 | #include <linux/kernel.h> |
12 | #include <linux/ktime.h> | 11 | #include <linux/ktime.h> |
13 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
@@ -19,6 +18,8 @@ | |||
19 | 18 | ||
20 | #include <asm/unaligned.h> | 19 | #include <asm/unaligned.h> |
21 | 20 | ||
21 | #include "netns.h" | ||
22 | |||
22 | #define NLMDBG_FACILITY NLMDBG_MONITOR | 23 | #define NLMDBG_FACILITY NLMDBG_MONITOR |
23 | #define NSM_PROGRAM 100024 | 24 | #define NSM_PROGRAM 100024 |
24 | #define NSM_VERSION 1 | 25 | #define NSM_VERSION 1 |
@@ -40,6 +41,7 @@ struct nsm_args { | |||
40 | u32 proc; | 41 | u32 proc; |
41 | 42 | ||
42 | char *mon_name; | 43 | char *mon_name; |
44 | char *nodename; | ||
43 | }; | 45 | }; |
44 | 46 | ||
45 | struct nsm_res { | 47 | struct nsm_res { |
@@ -70,7 +72,7 @@ static struct rpc_clnt *nsm_create(struct net *net) | |||
70 | }; | 72 | }; |
71 | struct rpc_create_args args = { | 73 | struct rpc_create_args args = { |
72 | .net = net, | 74 | .net = net, |
73 | .protocol = XPRT_TRANSPORT_UDP, | 75 | .protocol = XPRT_TRANSPORT_TCP, |
74 | .address = (struct sockaddr *)&sin, | 76 | .address = (struct sockaddr *)&sin, |
75 | .addrsize = sizeof(sin), | 77 | .addrsize = sizeof(sin), |
76 | .servername = "rpc.statd", | 78 | .servername = "rpc.statd", |
@@ -83,10 +85,54 @@ static struct rpc_clnt *nsm_create(struct net *net) | |||
83 | return rpc_create(&args); | 85 | return rpc_create(&args); |
84 | } | 86 | } |
85 | 87 | ||
86 | static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, | 88 | static struct rpc_clnt *nsm_client_get(struct net *net) |
87 | struct net *net) | ||
88 | { | 89 | { |
90 | static DEFINE_MUTEX(nsm_create_mutex); | ||
89 | struct rpc_clnt *clnt; | 91 | struct rpc_clnt *clnt; |
92 | struct lockd_net *ln = net_generic(net, lockd_net_id); | ||
93 | |||
94 | spin_lock(&ln->nsm_clnt_lock); | ||
95 | if (ln->nsm_users) { | ||
96 | ln->nsm_users++; | ||
97 | clnt = ln->nsm_clnt; | ||
98 | spin_unlock(&ln->nsm_clnt_lock); | ||
99 | goto out; | ||
100 | } | ||
101 | spin_unlock(&ln->nsm_clnt_lock); | ||
102 | |||
103 | mutex_lock(&nsm_create_mutex); | ||
104 | clnt = nsm_create(net); | ||
105 | if (!IS_ERR(clnt)) { | ||
106 | ln->nsm_clnt = clnt; | ||
107 | smp_wmb(); | ||
108 | ln->nsm_users = 1; | ||
109 | } | ||
110 | mutex_unlock(&nsm_create_mutex); | ||
111 | out: | ||
112 | return clnt; | ||
113 | } | ||
114 | |||
115 | static void nsm_client_put(struct net *net) | ||
116 | { | ||
117 | struct lockd_net *ln = net_generic(net, lockd_net_id); | ||
118 | struct rpc_clnt *clnt = ln->nsm_clnt; | ||
119 | int shutdown = 0; | ||
120 | |||
121 | spin_lock(&ln->nsm_clnt_lock); | ||
122 | if (ln->nsm_users) { | ||
123 | if (--ln->nsm_users) | ||
124 | ln->nsm_clnt = NULL; | ||
125 | shutdown = !ln->nsm_users; | ||
126 | } | ||
127 | spin_unlock(&ln->nsm_clnt_lock); | ||
128 | |||
129 | if (shutdown) | ||
130 | rpc_shutdown_client(clnt); | ||
131 | } | ||
132 | |||
133 | static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, | ||
134 | struct rpc_clnt *clnt) | ||
135 | { | ||
90 | int status; | 136 | int status; |
91 | struct nsm_args args = { | 137 | struct nsm_args args = { |
92 | .priv = &nsm->sm_priv, | 138 | .priv = &nsm->sm_priv, |
@@ -94,31 +140,24 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, | |||
94 | .vers = 3, | 140 | .vers = 3, |
95 | .proc = NLMPROC_NSM_NOTIFY, | 141 | .proc = NLMPROC_NSM_NOTIFY, |
96 | .mon_name = nsm->sm_mon_name, | 142 | .mon_name = nsm->sm_mon_name, |
143 | .nodename = clnt->cl_nodename, | ||
97 | }; | 144 | }; |
98 | struct rpc_message msg = { | 145 | struct rpc_message msg = { |
99 | .rpc_argp = &args, | 146 | .rpc_argp = &args, |
100 | .rpc_resp = res, | 147 | .rpc_resp = res, |
101 | }; | 148 | }; |
102 | 149 | ||
103 | clnt = nsm_create(net); | 150 | BUG_ON(clnt == NULL); |
104 | if (IS_ERR(clnt)) { | ||
105 | status = PTR_ERR(clnt); | ||
106 | dprintk("lockd: failed to create NSM upcall transport, " | ||
107 | "status=%d\n", status); | ||
108 | goto out; | ||
109 | } | ||
110 | 151 | ||
111 | memset(res, 0, sizeof(*res)); | 152 | memset(res, 0, sizeof(*res)); |
112 | 153 | ||
113 | msg.rpc_proc = &clnt->cl_procinfo[proc]; | 154 | msg.rpc_proc = &clnt->cl_procinfo[proc]; |
114 | status = rpc_call_sync(clnt, &msg, 0); | 155 | status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN); |
115 | if (status < 0) | 156 | if (status < 0) |
116 | dprintk("lockd: NSM upcall RPC failed, status=%d\n", | 157 | dprintk("lockd: NSM upcall RPC failed, status=%d\n", |
117 | status); | 158 | status); |
118 | else | 159 | else |
119 | status = 0; | 160 | status = 0; |
120 | rpc_shutdown_client(clnt); | ||
121 | out: | ||
122 | return status; | 161 | return status; |
123 | } | 162 | } |
124 | 163 | ||
@@ -138,6 +177,7 @@ int nsm_monitor(const struct nlm_host *host) | |||
138 | struct nsm_handle *nsm = host->h_nsmhandle; | 177 | struct nsm_handle *nsm = host->h_nsmhandle; |
139 | struct nsm_res res; | 178 | struct nsm_res res; |
140 | int status; | 179 | int status; |
180 | struct rpc_clnt *clnt; | ||
141 | 181 | ||
142 | dprintk("lockd: nsm_monitor(%s)\n", nsm->sm_name); | 182 | dprintk("lockd: nsm_monitor(%s)\n", nsm->sm_name); |
143 | 183 | ||
@@ -150,7 +190,15 @@ int nsm_monitor(const struct nlm_host *host) | |||
150 | */ | 190 | */ |
151 | nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf; | 191 | nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf; |
152 | 192 | ||
153 | status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, host->net); | 193 | clnt = nsm_client_get(host->net); |
194 | if (IS_ERR(clnt)) { | ||
195 | status = PTR_ERR(clnt); | ||
196 | dprintk("lockd: failed to create NSM upcall transport, " | ||
197 | "status=%d, net=%p\n", status, host->net); | ||
198 | return status; | ||
199 | } | ||
200 | |||
201 | status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, clnt); | ||
154 | if (unlikely(res.status != 0)) | 202 | if (unlikely(res.status != 0)) |
155 | status = -EIO; | 203 | status = -EIO; |
156 | if (unlikely(status < 0)) { | 204 | if (unlikely(status < 0)) { |
@@ -182,9 +230,11 @@ void nsm_unmonitor(const struct nlm_host *host) | |||
182 | 230 | ||
183 | if (atomic_read(&nsm->sm_count) == 1 | 231 | if (atomic_read(&nsm->sm_count) == 1 |
184 | && nsm->sm_monitored && !nsm->sm_sticky) { | 232 | && nsm->sm_monitored && !nsm->sm_sticky) { |
233 | struct lockd_net *ln = net_generic(host->net, lockd_net_id); | ||
234 | |||
185 | dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name); | 235 | dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name); |
186 | 236 | ||
187 | status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, host->net); | 237 | status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, ln->nsm_clnt); |
188 | if (res.status != 0) | 238 | if (res.status != 0) |
189 | status = -EIO; | 239 | status = -EIO; |
190 | if (status < 0) | 240 | if (status < 0) |
@@ -192,6 +242,8 @@ void nsm_unmonitor(const struct nlm_host *host) | |||
192 | nsm->sm_name); | 242 | nsm->sm_name); |
193 | else | 243 | else |
194 | nsm->sm_monitored = 0; | 244 | nsm->sm_monitored = 0; |
245 | |||
246 | nsm_client_put(host->net); | ||
195 | } | 247 | } |
196 | } | 248 | } |
197 | 249 | ||
@@ -430,7 +482,7 @@ static void encode_my_id(struct xdr_stream *xdr, const struct nsm_args *argp) | |||
430 | { | 482 | { |
431 | __be32 *p; | 483 | __be32 *p; |
432 | 484 | ||
433 | encode_nsm_string(xdr, utsname()->nodename); | 485 | encode_nsm_string(xdr, argp->nodename); |
434 | p = xdr_reserve_space(xdr, 4 + 4 + 4); | 486 | p = xdr_reserve_space(xdr, 4 + 4 + 4); |
435 | *p++ = cpu_to_be32(argp->prog); | 487 | *p++ = cpu_to_be32(argp->prog); |
436 | *p++ = cpu_to_be32(argp->vers); | 488 | *p++ = cpu_to_be32(argp->vers); |
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h index 4eee248ba96e..5010b55628b4 100644 --- a/fs/lockd/netns.h +++ b/fs/lockd/netns.h | |||
@@ -12,6 +12,10 @@ struct lockd_net { | |||
12 | struct delayed_work grace_period_end; | 12 | struct delayed_work grace_period_end; |
13 | struct lock_manager lockd_manager; | 13 | struct lock_manager lockd_manager; |
14 | struct list_head grace_list; | 14 | struct list_head grace_list; |
15 | |||
16 | spinlock_t nsm_clnt_lock; | ||
17 | unsigned int nsm_users; | ||
18 | struct rpc_clnt *nsm_clnt; | ||
15 | }; | 19 | }; |
16 | 20 | ||
17 | extern int lockd_net_id; | 21 | extern int lockd_net_id; |
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 31a63f87b806..a2aa97d45670 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c | |||
@@ -126,7 +126,7 @@ static void restart_grace(void) | |||
126 | static int | 126 | static int |
127 | lockd(void *vrqstp) | 127 | lockd(void *vrqstp) |
128 | { | 128 | { |
129 | int err = 0, preverr = 0; | 129 | int err = 0; |
130 | struct svc_rqst *rqstp = vrqstp; | 130 | struct svc_rqst *rqstp = vrqstp; |
131 | 131 | ||
132 | /* try_to_freeze() is called from svc_recv() */ | 132 | /* try_to_freeze() is called from svc_recv() */ |
@@ -165,21 +165,8 @@ lockd(void *vrqstp) | |||
165 | * recvfrom routine. | 165 | * recvfrom routine. |
166 | */ | 166 | */ |
167 | err = svc_recv(rqstp, timeout); | 167 | err = svc_recv(rqstp, timeout); |
168 | if (err == -EAGAIN || err == -EINTR) { | 168 | if (err == -EAGAIN || err == -EINTR) |
169 | preverr = err; | ||
170 | continue; | 169 | continue; |
171 | } | ||
172 | if (err < 0) { | ||
173 | if (err != preverr) { | ||
174 | printk(KERN_WARNING "%s: unexpected error " | ||
175 | "from svc_recv (%d)\n", __func__, err); | ||
176 | preverr = err; | ||
177 | } | ||
178 | schedule_timeout_interruptible(HZ); | ||
179 | continue; | ||
180 | } | ||
181 | preverr = err; | ||
182 | |||
183 | dprintk("lockd: request from %s\n", | 170 | dprintk("lockd: request from %s\n", |
184 | svc_print_addr(rqstp, buf, sizeof(buf))); | 171 | svc_print_addr(rqstp, buf, sizeof(buf))); |
185 | 172 | ||
@@ -596,6 +583,7 @@ static int lockd_init_net(struct net *net) | |||
596 | 583 | ||
597 | INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender); | 584 | INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender); |
598 | INIT_LIST_HEAD(&ln->grace_list); | 585 | INIT_LIST_HEAD(&ln->grace_list); |
586 | spin_lock_init(&ln->nsm_clnt_lock); | ||
599 | return 0; | 587 | return 0; |
600 | } | 588 | } |
601 | 589 | ||
diff --git a/fs/locks.c b/fs/locks.c index abc7dc6c490b..a94e331a52a2 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -1289,7 +1289,7 @@ EXPORT_SYMBOL(__break_lease); | |||
1289 | void lease_get_mtime(struct inode *inode, struct timespec *time) | 1289 | void lease_get_mtime(struct inode *inode, struct timespec *time) |
1290 | { | 1290 | { |
1291 | struct file_lock *flock = inode->i_flock; | 1291 | struct file_lock *flock = inode->i_flock; |
1292 | if (flock && IS_LEASE(flock) && (flock->fl_type & F_WRLCK)) | 1292 | if (flock && IS_LEASE(flock) && (flock->fl_type == F_WRLCK)) |
1293 | *time = current_fs_time(inode->i_sb); | 1293 | *time = current_fs_time(inode->i_sb); |
1294 | else | 1294 | else |
1295 | *time = inode->i_mtime; | 1295 | *time = inode->i_mtime; |
@@ -2185,8 +2185,8 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, | |||
2185 | } else { | 2185 | } else { |
2186 | seq_printf(f, "%s ", | 2186 | seq_printf(f, "%s ", |
2187 | (lease_breaking(fl)) | 2187 | (lease_breaking(fl)) |
2188 | ? (fl->fl_type & F_UNLCK) ? "UNLCK" : "READ " | 2188 | ? (fl->fl_type == F_UNLCK) ? "UNLCK" : "READ " |
2189 | : (fl->fl_type & F_WRLCK) ? "WRITE" : "READ "); | 2189 | : (fl->fl_type == F_WRLCK) ? "WRITE" : "READ "); |
2190 | } | 2190 | } |
2191 | if (inode) { | 2191 | if (inode) { |
2192 | #ifdef WE_CAN_BREAK_LSLK_NOW | 2192 | #ifdef WE_CAN_BREAK_LSLK_NOW |
diff --git a/fs/namei.c b/fs/namei.c index aa30d19e9edd..d1895f308156 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -117,18 +117,70 @@ | |||
117 | * POSIX.1 2.4: an empty pathname is invalid (ENOENT). | 117 | * POSIX.1 2.4: an empty pathname is invalid (ENOENT). |
118 | * PATH_MAX includes the nul terminator --RR. | 118 | * PATH_MAX includes the nul terminator --RR. |
119 | */ | 119 | */ |
120 | static char *getname_flags(const char __user *filename, int flags, int *empty) | 120 | void final_putname(struct filename *name) |
121 | { | 121 | { |
122 | char *result = __getname(), *err; | 122 | if (name->separate) { |
123 | __putname(name->name); | ||
124 | kfree(name); | ||
125 | } else { | ||
126 | __putname(name); | ||
127 | } | ||
128 | } | ||
129 | |||
130 | #define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename)) | ||
131 | |||
132 | static struct filename * | ||
133 | getname_flags(const char __user *filename, int flags, int *empty) | ||
134 | { | ||
135 | struct filename *result, *err; | ||
123 | int len; | 136 | int len; |
137 | long max; | ||
138 | char *kname; | ||
124 | 139 | ||
140 | result = audit_reusename(filename); | ||
141 | if (result) | ||
142 | return result; | ||
143 | |||
144 | result = __getname(); | ||
125 | if (unlikely(!result)) | 145 | if (unlikely(!result)) |
126 | return ERR_PTR(-ENOMEM); | 146 | return ERR_PTR(-ENOMEM); |
127 | 147 | ||
128 | len = strncpy_from_user(result, filename, PATH_MAX); | 148 | /* |
129 | err = ERR_PTR(len); | 149 | * First, try to embed the struct filename inside the names_cache |
130 | if (unlikely(len < 0)) | 150 | * allocation |
151 | */ | ||
152 | kname = (char *)result + sizeof(*result); | ||
153 | result->name = kname; | ||
154 | result->separate = false; | ||
155 | max = EMBEDDED_NAME_MAX; | ||
156 | |||
157 | recopy: | ||
158 | len = strncpy_from_user(kname, filename, max); | ||
159 | if (unlikely(len < 0)) { | ||
160 | err = ERR_PTR(len); | ||
131 | goto error; | 161 | goto error; |
162 | } | ||
163 | |||
164 | /* | ||
165 | * Uh-oh. We have a name that's approaching PATH_MAX. Allocate a | ||
166 | * separate struct filename so we can dedicate the entire | ||
167 | * names_cache allocation for the pathname, and re-do the copy from | ||
168 | * userland. | ||
169 | */ | ||
170 | if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) { | ||
171 | kname = (char *)result; | ||
172 | |||
173 | result = kzalloc(sizeof(*result), GFP_KERNEL); | ||
174 | if (!result) { | ||
175 | err = ERR_PTR(-ENOMEM); | ||
176 | result = (struct filename *)kname; | ||
177 | goto error; | ||
178 | } | ||
179 | result->name = kname; | ||
180 | result->separate = true; | ||
181 | max = PATH_MAX; | ||
182 | goto recopy; | ||
183 | } | ||
132 | 184 | ||
133 | /* The empty path is special. */ | 185 | /* The empty path is special. */ |
134 | if (unlikely(!len)) { | 186 | if (unlikely(!len)) { |
@@ -140,30 +192,32 @@ static char *getname_flags(const char __user *filename, int flags, int *empty) | |||
140 | } | 192 | } |
141 | 193 | ||
142 | err = ERR_PTR(-ENAMETOOLONG); | 194 | err = ERR_PTR(-ENAMETOOLONG); |
143 | if (likely(len < PATH_MAX)) { | 195 | if (unlikely(len >= PATH_MAX)) |
144 | audit_getname(result); | 196 | goto error; |
145 | return result; | 197 | |
146 | } | 198 | result->uptr = filename; |
199 | audit_getname(result); | ||
200 | return result; | ||
147 | 201 | ||
148 | error: | 202 | error: |
149 | __putname(result); | 203 | final_putname(result); |
150 | return err; | 204 | return err; |
151 | } | 205 | } |
152 | 206 | ||
153 | char *getname(const char __user * filename) | 207 | struct filename * |
208 | getname(const char __user * filename) | ||
154 | { | 209 | { |
155 | return getname_flags(filename, 0, NULL); | 210 | return getname_flags(filename, 0, NULL); |
156 | } | 211 | } |
212 | EXPORT_SYMBOL(getname); | ||
157 | 213 | ||
158 | #ifdef CONFIG_AUDITSYSCALL | 214 | #ifdef CONFIG_AUDITSYSCALL |
159 | void putname(const char *name) | 215 | void putname(struct filename *name) |
160 | { | 216 | { |
161 | if (unlikely(!audit_dummy_context())) | 217 | if (unlikely(!audit_dummy_context())) |
162 | audit_putname(name); | 218 | return audit_putname(name); |
163 | else | 219 | final_putname(name); |
164 | __putname(name); | ||
165 | } | 220 | } |
166 | EXPORT_SYMBOL(putname); | ||
167 | #endif | 221 | #endif |
168 | 222 | ||
169 | static int check_acl(struct inode *inode, int mask) | 223 | static int check_acl(struct inode *inode, int mask) |
@@ -692,9 +746,9 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd) | |||
692 | if (uid_eq(parent->i_uid, inode->i_uid)) | 746 | if (uid_eq(parent->i_uid, inode->i_uid)) |
693 | return 0; | 747 | return 0; |
694 | 748 | ||
749 | audit_log_link_denied("follow_link", link); | ||
695 | path_put_conditional(link, nd); | 750 | path_put_conditional(link, nd); |
696 | path_put(&nd->path); | 751 | path_put(&nd->path); |
697 | audit_log_link_denied("follow_link", link); | ||
698 | return -EACCES; | 752 | return -EACCES; |
699 | } | 753 | } |
700 | 754 | ||
@@ -810,6 +864,7 @@ follow_link(struct path *link, struct nameidata *nd, void **p) | |||
810 | return error; | 864 | return error; |
811 | 865 | ||
812 | out_put_nd_path: | 866 | out_put_nd_path: |
867 | *p = NULL; | ||
813 | path_put(&nd->path); | 868 | path_put(&nd->path); |
814 | path_put(link); | 869 | path_put(link); |
815 | return error; | 870 | return error; |
@@ -1962,24 +2017,29 @@ static int path_lookupat(int dfd, const char *name, | |||
1962 | return err; | 2017 | return err; |
1963 | } | 2018 | } |
1964 | 2019 | ||
1965 | static int do_path_lookup(int dfd, const char *name, | 2020 | static int filename_lookup(int dfd, struct filename *name, |
1966 | unsigned int flags, struct nameidata *nd) | 2021 | unsigned int flags, struct nameidata *nd) |
1967 | { | 2022 | { |
1968 | int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd); | 2023 | int retval = path_lookupat(dfd, name->name, flags | LOOKUP_RCU, nd); |
1969 | if (unlikely(retval == -ECHILD)) | 2024 | if (unlikely(retval == -ECHILD)) |
1970 | retval = path_lookupat(dfd, name, flags, nd); | 2025 | retval = path_lookupat(dfd, name->name, flags, nd); |
1971 | if (unlikely(retval == -ESTALE)) | 2026 | if (unlikely(retval == -ESTALE)) |
1972 | retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd); | 2027 | retval = path_lookupat(dfd, name->name, |
2028 | flags | LOOKUP_REVAL, nd); | ||
1973 | 2029 | ||
1974 | if (likely(!retval)) { | 2030 | if (likely(!retval)) |
1975 | if (unlikely(!audit_dummy_context())) { | 2031 | audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT); |
1976 | if (nd->path.dentry && nd->inode) | ||
1977 | audit_inode(name, nd->path.dentry); | ||
1978 | } | ||
1979 | } | ||
1980 | return retval; | 2032 | return retval; |
1981 | } | 2033 | } |
1982 | 2034 | ||
2035 | static int do_path_lookup(int dfd, const char *name, | ||
2036 | unsigned int flags, struct nameidata *nd) | ||
2037 | { | ||
2038 | struct filename filename = { .name = name }; | ||
2039 | |||
2040 | return filename_lookup(dfd, &filename, flags, nd); | ||
2041 | } | ||
2042 | |||
1983 | /* does lookup, returns the object with parent locked */ | 2043 | /* does lookup, returns the object with parent locked */ |
1984 | struct dentry *kern_path_locked(const char *name, struct path *path) | 2044 | struct dentry *kern_path_locked(const char *name, struct path *path) |
1985 | { | 2045 | { |
@@ -2097,13 +2157,13 @@ int user_path_at_empty(int dfd, const char __user *name, unsigned flags, | |||
2097 | struct path *path, int *empty) | 2157 | struct path *path, int *empty) |
2098 | { | 2158 | { |
2099 | struct nameidata nd; | 2159 | struct nameidata nd; |
2100 | char *tmp = getname_flags(name, flags, empty); | 2160 | struct filename *tmp = getname_flags(name, flags, empty); |
2101 | int err = PTR_ERR(tmp); | 2161 | int err = PTR_ERR(tmp); |
2102 | if (!IS_ERR(tmp)) { | 2162 | if (!IS_ERR(tmp)) { |
2103 | 2163 | ||
2104 | BUG_ON(flags & LOOKUP_PARENT); | 2164 | BUG_ON(flags & LOOKUP_PARENT); |
2105 | 2165 | ||
2106 | err = do_path_lookup(dfd, tmp, flags, &nd); | 2166 | err = filename_lookup(dfd, tmp, flags, &nd); |
2107 | putname(tmp); | 2167 | putname(tmp); |
2108 | if (!err) | 2168 | if (!err) |
2109 | *path = nd.path; | 2169 | *path = nd.path; |
@@ -2117,22 +2177,28 @@ int user_path_at(int dfd, const char __user *name, unsigned flags, | |||
2117 | return user_path_at_empty(dfd, name, flags, path, NULL); | 2177 | return user_path_at_empty(dfd, name, flags, path, NULL); |
2118 | } | 2178 | } |
2119 | 2179 | ||
2120 | static int user_path_parent(int dfd, const char __user *path, | 2180 | /* |
2121 | struct nameidata *nd, char **name) | 2181 | * NB: most callers don't do anything directly with the reference to the |
2182 | * to struct filename, but the nd->last pointer points into the name string | ||
2183 | * allocated by getname. So we must hold the reference to it until all | ||
2184 | * path-walking is complete. | ||
2185 | */ | ||
2186 | static struct filename * | ||
2187 | user_path_parent(int dfd, const char __user *path, struct nameidata *nd) | ||
2122 | { | 2188 | { |
2123 | char *s = getname(path); | 2189 | struct filename *s = getname(path); |
2124 | int error; | 2190 | int error; |
2125 | 2191 | ||
2126 | if (IS_ERR(s)) | 2192 | if (IS_ERR(s)) |
2127 | return PTR_ERR(s); | 2193 | return s; |
2128 | 2194 | ||
2129 | error = do_path_lookup(dfd, s, LOOKUP_PARENT, nd); | 2195 | error = filename_lookup(dfd, s, LOOKUP_PARENT, nd); |
2130 | if (error) | 2196 | if (error) { |
2131 | putname(s); | 2197 | putname(s); |
2132 | else | 2198 | return ERR_PTR(error); |
2133 | *name = s; | 2199 | } |
2134 | 2200 | ||
2135 | return error; | 2201 | return s; |
2136 | } | 2202 | } |
2137 | 2203 | ||
2138 | /* | 2204 | /* |
@@ -2179,7 +2245,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir) | |||
2179 | return -ENOENT; | 2245 | return -ENOENT; |
2180 | 2246 | ||
2181 | BUG_ON(victim->d_parent->d_inode != dir); | 2247 | BUG_ON(victim->d_parent->d_inode != dir); |
2182 | audit_inode_child(victim, dir); | 2248 | audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); |
2183 | 2249 | ||
2184 | error = inode_permission(dir, MAY_WRITE | MAY_EXEC); | 2250 | error = inode_permission(dir, MAY_WRITE | MAY_EXEC); |
2185 | if (error) | 2251 | if (error) |
@@ -2624,7 +2690,7 @@ out_dput: | |||
2624 | */ | 2690 | */ |
2625 | static int do_last(struct nameidata *nd, struct path *path, | 2691 | static int do_last(struct nameidata *nd, struct path *path, |
2626 | struct file *file, const struct open_flags *op, | 2692 | struct file *file, const struct open_flags *op, |
2627 | int *opened, const char *pathname) | 2693 | int *opened, struct filename *name) |
2628 | { | 2694 | { |
2629 | struct dentry *dir = nd->path.dentry; | 2695 | struct dentry *dir = nd->path.dentry; |
2630 | int open_flag = op->open_flag; | 2696 | int open_flag = op->open_flag; |
@@ -2651,7 +2717,7 @@ static int do_last(struct nameidata *nd, struct path *path, | |||
2651 | error = complete_walk(nd); | 2717 | error = complete_walk(nd); |
2652 | if (error) | 2718 | if (error) |
2653 | return error; | 2719 | return error; |
2654 | audit_inode(pathname, nd->path.dentry); | 2720 | audit_inode(name, nd->path.dentry, 0); |
2655 | if (open_flag & O_CREAT) { | 2721 | if (open_flag & O_CREAT) { |
2656 | error = -EISDIR; | 2722 | error = -EISDIR; |
2657 | goto out; | 2723 | goto out; |
@@ -2661,7 +2727,7 @@ static int do_last(struct nameidata *nd, struct path *path, | |||
2661 | error = complete_walk(nd); | 2727 | error = complete_walk(nd); |
2662 | if (error) | 2728 | if (error) |
2663 | return error; | 2729 | return error; |
2664 | audit_inode(pathname, dir); | 2730 | audit_inode(name, dir, 0); |
2665 | goto finish_open; | 2731 | goto finish_open; |
2666 | } | 2732 | } |
2667 | 2733 | ||
@@ -2690,7 +2756,7 @@ static int do_last(struct nameidata *nd, struct path *path, | |||
2690 | if (error) | 2756 | if (error) |
2691 | return error; | 2757 | return error; |
2692 | 2758 | ||
2693 | audit_inode(pathname, dir); | 2759 | audit_inode(name, dir, 0); |
2694 | error = -EISDIR; | 2760 | error = -EISDIR; |
2695 | /* trailing slashes? */ | 2761 | /* trailing slashes? */ |
2696 | if (nd->last.name[nd->last.len]) | 2762 | if (nd->last.name[nd->last.len]) |
@@ -2720,7 +2786,7 @@ retry_lookup: | |||
2720 | !S_ISREG(file->f_path.dentry->d_inode->i_mode)) | 2786 | !S_ISREG(file->f_path.dentry->d_inode->i_mode)) |
2721 | will_truncate = false; | 2787 | will_truncate = false; |
2722 | 2788 | ||
2723 | audit_inode(pathname, file->f_path.dentry); | 2789 | audit_inode(name, file->f_path.dentry, 0); |
2724 | goto opened; | 2790 | goto opened; |
2725 | } | 2791 | } |
2726 | 2792 | ||
@@ -2737,7 +2803,7 @@ retry_lookup: | |||
2737 | * create/update audit record if it already exists. | 2803 | * create/update audit record if it already exists. |
2738 | */ | 2804 | */ |
2739 | if (path->dentry->d_inode) | 2805 | if (path->dentry->d_inode) |
2740 | audit_inode(pathname, path->dentry); | 2806 | audit_inode(name, path->dentry, 0); |
2741 | 2807 | ||
2742 | /* | 2808 | /* |
2743 | * If atomic_open() acquired write access it is dropped now due to | 2809 | * If atomic_open() acquired write access it is dropped now due to |
@@ -2802,7 +2868,7 @@ finish_lookup: | |||
2802 | error = -ENOTDIR; | 2868 | error = -ENOTDIR; |
2803 | if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup) | 2869 | if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup) |
2804 | goto out; | 2870 | goto out; |
2805 | audit_inode(pathname, nd->path.dentry); | 2871 | audit_inode(name, nd->path.dentry, 0); |
2806 | finish_open: | 2872 | finish_open: |
2807 | if (!S_ISREG(nd->inode->i_mode)) | 2873 | if (!S_ISREG(nd->inode->i_mode)) |
2808 | will_truncate = false; | 2874 | will_truncate = false; |
@@ -2870,7 +2936,7 @@ stale_open: | |||
2870 | goto retry_lookup; | 2936 | goto retry_lookup; |
2871 | } | 2937 | } |
2872 | 2938 | ||
2873 | static struct file *path_openat(int dfd, const char *pathname, | 2939 | static struct file *path_openat(int dfd, struct filename *pathname, |
2874 | struct nameidata *nd, const struct open_flags *op, int flags) | 2940 | struct nameidata *nd, const struct open_flags *op, int flags) |
2875 | { | 2941 | { |
2876 | struct file *base = NULL; | 2942 | struct file *base = NULL; |
@@ -2885,12 +2951,12 @@ static struct file *path_openat(int dfd, const char *pathname, | |||
2885 | 2951 | ||
2886 | file->f_flags = op->open_flag; | 2952 | file->f_flags = op->open_flag; |
2887 | 2953 | ||
2888 | error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base); | 2954 | error = path_init(dfd, pathname->name, flags | LOOKUP_PARENT, nd, &base); |
2889 | if (unlikely(error)) | 2955 | if (unlikely(error)) |
2890 | goto out; | 2956 | goto out; |
2891 | 2957 | ||
2892 | current->total_link_count = 0; | 2958 | current->total_link_count = 0; |
2893 | error = link_path_walk(pathname, nd); | 2959 | error = link_path_walk(pathname->name, nd); |
2894 | if (unlikely(error)) | 2960 | if (unlikely(error)) |
2895 | goto out; | 2961 | goto out; |
2896 | 2962 | ||
@@ -2936,7 +3002,7 @@ out: | |||
2936 | return file; | 3002 | return file; |
2937 | } | 3003 | } |
2938 | 3004 | ||
2939 | struct file *do_filp_open(int dfd, const char *pathname, | 3005 | struct file *do_filp_open(int dfd, struct filename *pathname, |
2940 | const struct open_flags *op, int flags) | 3006 | const struct open_flags *op, int flags) |
2941 | { | 3007 | { |
2942 | struct nameidata nd; | 3008 | struct nameidata nd; |
@@ -2955,6 +3021,7 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, | |||
2955 | { | 3021 | { |
2956 | struct nameidata nd; | 3022 | struct nameidata nd; |
2957 | struct file *file; | 3023 | struct file *file; |
3024 | struct filename filename = { .name = name }; | ||
2958 | 3025 | ||
2959 | nd.root.mnt = mnt; | 3026 | nd.root.mnt = mnt; |
2960 | nd.root.dentry = dentry; | 3027 | nd.root.dentry = dentry; |
@@ -2964,11 +3031,11 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, | |||
2964 | if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN) | 3031 | if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN) |
2965 | return ERR_PTR(-ELOOP); | 3032 | return ERR_PTR(-ELOOP); |
2966 | 3033 | ||
2967 | file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU); | 3034 | file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_RCU); |
2968 | if (unlikely(file == ERR_PTR(-ECHILD))) | 3035 | if (unlikely(file == ERR_PTR(-ECHILD))) |
2969 | file = path_openat(-1, name, &nd, op, flags); | 3036 | file = path_openat(-1, &filename, &nd, op, flags); |
2970 | if (unlikely(file == ERR_PTR(-ESTALE))) | 3037 | if (unlikely(file == ERR_PTR(-ESTALE))) |
2971 | file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL); | 3038 | file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_REVAL); |
2972 | return file; | 3039 | return file; |
2973 | } | 3040 | } |
2974 | 3041 | ||
@@ -3043,11 +3110,11 @@ EXPORT_SYMBOL(done_path_create); | |||
3043 | 3110 | ||
3044 | struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir) | 3111 | struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir) |
3045 | { | 3112 | { |
3046 | char *tmp = getname(pathname); | 3113 | struct filename *tmp = getname(pathname); |
3047 | struct dentry *res; | 3114 | struct dentry *res; |
3048 | if (IS_ERR(tmp)) | 3115 | if (IS_ERR(tmp)) |
3049 | return ERR_CAST(tmp); | 3116 | return ERR_CAST(tmp); |
3050 | res = kern_path_create(dfd, tmp, path, is_dir); | 3117 | res = kern_path_create(dfd, tmp->name, path, is_dir); |
3051 | putname(tmp); | 3118 | putname(tmp); |
3052 | return res; | 3119 | return res; |
3053 | } | 3120 | } |
@@ -3252,13 +3319,13 @@ out: | |||
3252 | static long do_rmdir(int dfd, const char __user *pathname) | 3319 | static long do_rmdir(int dfd, const char __user *pathname) |
3253 | { | 3320 | { |
3254 | int error = 0; | 3321 | int error = 0; |
3255 | char * name; | 3322 | struct filename *name; |
3256 | struct dentry *dentry; | 3323 | struct dentry *dentry; |
3257 | struct nameidata nd; | 3324 | struct nameidata nd; |
3258 | 3325 | ||
3259 | error = user_path_parent(dfd, pathname, &nd, &name); | 3326 | name = user_path_parent(dfd, pathname, &nd); |
3260 | if (error) | 3327 | if (IS_ERR(name)) |
3261 | return error; | 3328 | return PTR_ERR(name); |
3262 | 3329 | ||
3263 | switch(nd.last_type) { | 3330 | switch(nd.last_type) { |
3264 | case LAST_DOTDOT: | 3331 | case LAST_DOTDOT: |
@@ -3347,14 +3414,14 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) | |||
3347 | static long do_unlinkat(int dfd, const char __user *pathname) | 3414 | static long do_unlinkat(int dfd, const char __user *pathname) |
3348 | { | 3415 | { |
3349 | int error; | 3416 | int error; |
3350 | char *name; | 3417 | struct filename *name; |
3351 | struct dentry *dentry; | 3418 | struct dentry *dentry; |
3352 | struct nameidata nd; | 3419 | struct nameidata nd; |
3353 | struct inode *inode = NULL; | 3420 | struct inode *inode = NULL; |
3354 | 3421 | ||
3355 | error = user_path_parent(dfd, pathname, &nd, &name); | 3422 | name = user_path_parent(dfd, pathname, &nd); |
3356 | if (error) | 3423 | if (IS_ERR(name)) |
3357 | return error; | 3424 | return PTR_ERR(name); |
3358 | 3425 | ||
3359 | error = -EISDIR; | 3426 | error = -EISDIR; |
3360 | if (nd.last_type != LAST_NORM) | 3427 | if (nd.last_type != LAST_NORM) |
@@ -3438,7 +3505,7 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, | |||
3438 | int, newdfd, const char __user *, newname) | 3505 | int, newdfd, const char __user *, newname) |
3439 | { | 3506 | { |
3440 | int error; | 3507 | int error; |
3441 | char *from; | 3508 | struct filename *from; |
3442 | struct dentry *dentry; | 3509 | struct dentry *dentry; |
3443 | struct path path; | 3510 | struct path path; |
3444 | 3511 | ||
@@ -3451,9 +3518,9 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, | |||
3451 | if (IS_ERR(dentry)) | 3518 | if (IS_ERR(dentry)) |
3452 | goto out_putname; | 3519 | goto out_putname; |
3453 | 3520 | ||
3454 | error = security_path_symlink(&path, dentry, from); | 3521 | error = security_path_symlink(&path, dentry, from->name); |
3455 | if (!error) | 3522 | if (!error) |
3456 | error = vfs_symlink(path.dentry->d_inode, dentry, from); | 3523 | error = vfs_symlink(path.dentry->d_inode, dentry, from->name); |
3457 | done_path_create(&path, dentry); | 3524 | done_path_create(&path, dentry); |
3458 | out_putname: | 3525 | out_putname: |
3459 | putname(from); | 3526 | putname(from); |
@@ -3733,17 +3800,21 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, | |||
3733 | struct dentry *old_dentry, *new_dentry; | 3800 | struct dentry *old_dentry, *new_dentry; |
3734 | struct dentry *trap; | 3801 | struct dentry *trap; |
3735 | struct nameidata oldnd, newnd; | 3802 | struct nameidata oldnd, newnd; |
3736 | char *from; | 3803 | struct filename *from; |
3737 | char *to; | 3804 | struct filename *to; |
3738 | int error; | 3805 | int error; |
3739 | 3806 | ||
3740 | error = user_path_parent(olddfd, oldname, &oldnd, &from); | 3807 | from = user_path_parent(olddfd, oldname, &oldnd); |
3741 | if (error) | 3808 | if (IS_ERR(from)) { |
3809 | error = PTR_ERR(from); | ||
3742 | goto exit; | 3810 | goto exit; |
3811 | } | ||
3743 | 3812 | ||
3744 | error = user_path_parent(newdfd, newname, &newnd, &to); | 3813 | to = user_path_parent(newdfd, newname, &newnd); |
3745 | if (error) | 3814 | if (IS_ERR(to)) { |
3815 | error = PTR_ERR(to); | ||
3746 | goto exit1; | 3816 | goto exit1; |
3817 | } | ||
3747 | 3818 | ||
3748 | error = -EXDEV; | 3819 | error = -EXDEV; |
3749 | if (oldnd.path.mnt != newnd.path.mnt) | 3820 | if (oldnd.path.mnt != newnd.path.mnt) |
@@ -3967,7 +4038,6 @@ EXPORT_SYMBOL(follow_down_one); | |||
3967 | EXPORT_SYMBOL(follow_down); | 4038 | EXPORT_SYMBOL(follow_down); |
3968 | EXPORT_SYMBOL(follow_up); | 4039 | EXPORT_SYMBOL(follow_up); |
3969 | EXPORT_SYMBOL(get_write_access); /* nfsd */ | 4040 | EXPORT_SYMBOL(get_write_access); /* nfsd */ |
3970 | EXPORT_SYMBOL(getname); | ||
3971 | EXPORT_SYMBOL(lock_rename); | 4041 | EXPORT_SYMBOL(lock_rename); |
3972 | EXPORT_SYMBOL(lookup_one_len); | 4042 | EXPORT_SYMBOL(lookup_one_len); |
3973 | EXPORT_SYMBOL(page_follow_link_light); | 4043 | EXPORT_SYMBOL(page_follow_link_light); |
diff --git a/fs/namespace.c b/fs/namespace.c index 7bdf7907413f..24960626bb6b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -1640,7 +1640,7 @@ static int do_change_type(struct path *path, int flag) | |||
1640 | /* | 1640 | /* |
1641 | * do loopback mount. | 1641 | * do loopback mount. |
1642 | */ | 1642 | */ |
1643 | static int do_loopback(struct path *path, char *old_name, | 1643 | static int do_loopback(struct path *path, const char *old_name, |
1644 | int recurse) | 1644 | int recurse) |
1645 | { | 1645 | { |
1646 | LIST_HEAD(umount_list); | 1646 | LIST_HEAD(umount_list); |
@@ -1764,7 +1764,7 @@ static inline int tree_contains_unbindable(struct mount *mnt) | |||
1764 | return 0; | 1764 | return 0; |
1765 | } | 1765 | } |
1766 | 1766 | ||
1767 | static int do_move_mount(struct path *path, char *old_name) | 1767 | static int do_move_mount(struct path *path, const char *old_name) |
1768 | { | 1768 | { |
1769 | struct path old_path, parent_path; | 1769 | struct path old_path, parent_path; |
1770 | struct mount *p; | 1770 | struct mount *p; |
@@ -1917,8 +1917,8 @@ unlock: | |||
1917 | * create a new mount for userspace and request it to be added into the | 1917 | * create a new mount for userspace and request it to be added into the |
1918 | * namespace's tree | 1918 | * namespace's tree |
1919 | */ | 1919 | */ |
1920 | static int do_new_mount(struct path *path, char *type, int flags, | 1920 | static int do_new_mount(struct path *path, const char *type, int flags, |
1921 | int mnt_flags, char *name, void *data) | 1921 | int mnt_flags, const char *name, void *data) |
1922 | { | 1922 | { |
1923 | struct vfsmount *mnt; | 1923 | struct vfsmount *mnt; |
1924 | int err; | 1924 | int err; |
@@ -2191,8 +2191,8 @@ int copy_mount_string(const void __user *data, char **where) | |||
2191 | * Therefore, if this magic number is present, it carries no information | 2191 | * Therefore, if this magic number is present, it carries no information |
2192 | * and must be discarded. | 2192 | * and must be discarded. |
2193 | */ | 2193 | */ |
2194 | long do_mount(char *dev_name, char *dir_name, char *type_page, | 2194 | long do_mount(const char *dev_name, const char *dir_name, |
2195 | unsigned long flags, void *data_page) | 2195 | const char *type_page, unsigned long flags, void *data_page) |
2196 | { | 2196 | { |
2197 | struct path path; | 2197 | struct path path; |
2198 | int retval = 0; | 2198 | int retval = 0; |
@@ -2408,7 +2408,7 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, | |||
2408 | { | 2408 | { |
2409 | int ret; | 2409 | int ret; |
2410 | char *kernel_type; | 2410 | char *kernel_type; |
2411 | char *kernel_dir; | 2411 | struct filename *kernel_dir; |
2412 | char *kernel_dev; | 2412 | char *kernel_dev; |
2413 | unsigned long data_page; | 2413 | unsigned long data_page; |
2414 | 2414 | ||
@@ -2430,7 +2430,7 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, | |||
2430 | if (ret < 0) | 2430 | if (ret < 0) |
2431 | goto out_data; | 2431 | goto out_data; |
2432 | 2432 | ||
2433 | ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags, | 2433 | ret = do_mount(kernel_dev, kernel_dir->name, kernel_type, flags, |
2434 | (void *) data_page); | 2434 | (void *) data_page); |
2435 | 2435 | ||
2436 | free_page(data_page); | 2436 | free_page(data_page); |
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index db7ad719628a..13ca196385f5 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
@@ -95,8 +95,8 @@ config NFS_SWAP | |||
95 | This option enables swapon to work on files located on NFS mounts. | 95 | This option enables swapon to work on files located on NFS mounts. |
96 | 96 | ||
97 | config NFS_V4_1 | 97 | config NFS_V4_1 |
98 | bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" | 98 | bool "NFS client support for NFSv4.1" |
99 | depends on NFS_V4 && EXPERIMENTAL | 99 | depends on NFS_V4 |
100 | select SUNRPC_BACKCHANNEL | 100 | select SUNRPC_BACKCHANNEL |
101 | help | 101 | help |
102 | This option enables support for minor version 1 of the NFSv4 protocol | 102 | This option enables support for minor version 1 of the NFSv4 protocol |
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index dd392ed5f2e2..f1027b06a1a9 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/bio.h> /* struct bio */ | 37 | #include <linux/bio.h> /* struct bio */ |
38 | #include <linux/buffer_head.h> /* various write calls */ | 38 | #include <linux/buffer_head.h> /* various write calls */ |
39 | #include <linux/prefetch.h> | 39 | #include <linux/prefetch.h> |
40 | #include <linux/pagevec.h> | ||
40 | 41 | ||
41 | #include "../pnfs.h" | 42 | #include "../pnfs.h" |
42 | #include "../internal.h" | 43 | #include "../internal.h" |
@@ -162,25 +163,39 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect, | |||
162 | return bio; | 163 | return bio; |
163 | } | 164 | } |
164 | 165 | ||
165 | static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, | 166 | static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw, |
166 | sector_t isect, struct page *page, | 167 | sector_t isect, struct page *page, |
167 | struct pnfs_block_extent *be, | 168 | struct pnfs_block_extent *be, |
168 | void (*end_io)(struct bio *, int err), | 169 | void (*end_io)(struct bio *, int err), |
169 | struct parallel_io *par) | 170 | struct parallel_io *par, |
171 | unsigned int offset, int len) | ||
170 | { | 172 | { |
173 | isect = isect + (offset >> SECTOR_SHIFT); | ||
174 | dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, | ||
175 | npg, rw, (unsigned long long)isect, offset, len); | ||
171 | retry: | 176 | retry: |
172 | if (!bio) { | 177 | if (!bio) { |
173 | bio = bl_alloc_init_bio(npg, isect, be, end_io, par); | 178 | bio = bl_alloc_init_bio(npg, isect, be, end_io, par); |
174 | if (!bio) | 179 | if (!bio) |
175 | return ERR_PTR(-ENOMEM); | 180 | return ERR_PTR(-ENOMEM); |
176 | } | 181 | } |
177 | if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { | 182 | if (bio_add_page(bio, page, len, offset) < len) { |
178 | bio = bl_submit_bio(rw, bio); | 183 | bio = bl_submit_bio(rw, bio); |
179 | goto retry; | 184 | goto retry; |
180 | } | 185 | } |
181 | return bio; | 186 | return bio; |
182 | } | 187 | } |
183 | 188 | ||
189 | static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, | ||
190 | sector_t isect, struct page *page, | ||
191 | struct pnfs_block_extent *be, | ||
192 | void (*end_io)(struct bio *, int err), | ||
193 | struct parallel_io *par) | ||
194 | { | ||
195 | return do_add_page_to_bio(bio, npg, rw, isect, page, be, | ||
196 | end_io, par, 0, PAGE_CACHE_SIZE); | ||
197 | } | ||
198 | |||
184 | /* This is basically copied from mpage_end_io_read */ | 199 | /* This is basically copied from mpage_end_io_read */ |
185 | static void bl_end_io_read(struct bio *bio, int err) | 200 | static void bl_end_io_read(struct bio *bio, int err) |
186 | { | 201 | { |
@@ -228,14 +243,6 @@ bl_end_par_io_read(void *data, int unused) | |||
228 | schedule_work(&rdata->task.u.tk_work); | 243 | schedule_work(&rdata->task.u.tk_work); |
229 | } | 244 | } |
230 | 245 | ||
231 | static bool | ||
232 | bl_check_alignment(u64 offset, u32 len, unsigned long blkmask) | ||
233 | { | ||
234 | if ((offset & blkmask) || (len & blkmask)) | ||
235 | return false; | ||
236 | return true; | ||
237 | } | ||
238 | |||
239 | static enum pnfs_try_status | 246 | static enum pnfs_try_status |
240 | bl_read_pagelist(struct nfs_read_data *rdata) | 247 | bl_read_pagelist(struct nfs_read_data *rdata) |
241 | { | 248 | { |
@@ -246,15 +253,15 @@ bl_read_pagelist(struct nfs_read_data *rdata) | |||
246 | sector_t isect, extent_length = 0; | 253 | sector_t isect, extent_length = 0; |
247 | struct parallel_io *par; | 254 | struct parallel_io *par; |
248 | loff_t f_offset = rdata->args.offset; | 255 | loff_t f_offset = rdata->args.offset; |
256 | size_t bytes_left = rdata->args.count; | ||
257 | unsigned int pg_offset, pg_len; | ||
249 | struct page **pages = rdata->args.pages; | 258 | struct page **pages = rdata->args.pages; |
250 | int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; | 259 | int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; |
260 | const bool is_dio = (header->dreq != NULL); | ||
251 | 261 | ||
252 | dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, | 262 | dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, |
253 | rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); | 263 | rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); |
254 | 264 | ||
255 | if (!bl_check_alignment(f_offset, rdata->args.count, PAGE_CACHE_MASK)) | ||
256 | goto use_mds; | ||
257 | |||
258 | par = alloc_parallel(rdata); | 265 | par = alloc_parallel(rdata); |
259 | if (!par) | 266 | if (!par) |
260 | goto use_mds; | 267 | goto use_mds; |
@@ -284,36 +291,53 @@ bl_read_pagelist(struct nfs_read_data *rdata) | |||
284 | extent_length = min(extent_length, cow_length); | 291 | extent_length = min(extent_length, cow_length); |
285 | } | 292 | } |
286 | } | 293 | } |
294 | |||
295 | if (is_dio) { | ||
296 | pg_offset = f_offset & ~PAGE_CACHE_MASK; | ||
297 | if (pg_offset + bytes_left > PAGE_CACHE_SIZE) | ||
298 | pg_len = PAGE_CACHE_SIZE - pg_offset; | ||
299 | else | ||
300 | pg_len = bytes_left; | ||
301 | |||
302 | f_offset += pg_len; | ||
303 | bytes_left -= pg_len; | ||
304 | isect += (pg_offset >> SECTOR_SHIFT); | ||
305 | } else { | ||
306 | pg_offset = 0; | ||
307 | pg_len = PAGE_CACHE_SIZE; | ||
308 | } | ||
309 | |||
287 | hole = is_hole(be, isect); | 310 | hole = is_hole(be, isect); |
288 | if (hole && !cow_read) { | 311 | if (hole && !cow_read) { |
289 | bio = bl_submit_bio(READ, bio); | 312 | bio = bl_submit_bio(READ, bio); |
290 | /* Fill hole w/ zeroes w/o accessing device */ | 313 | /* Fill hole w/ zeroes w/o accessing device */ |
291 | dprintk("%s Zeroing page for hole\n", __func__); | 314 | dprintk("%s Zeroing page for hole\n", __func__); |
292 | zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); | 315 | zero_user_segment(pages[i], pg_offset, pg_len); |
293 | print_page(pages[i]); | 316 | print_page(pages[i]); |
294 | SetPageUptodate(pages[i]); | 317 | SetPageUptodate(pages[i]); |
295 | } else { | 318 | } else { |
296 | struct pnfs_block_extent *be_read; | 319 | struct pnfs_block_extent *be_read; |
297 | 320 | ||
298 | be_read = (hole && cow_read) ? cow_read : be; | 321 | be_read = (hole && cow_read) ? cow_read : be; |
299 | bio = bl_add_page_to_bio(bio, rdata->pages.npages - i, | 322 | bio = do_add_page_to_bio(bio, rdata->pages.npages - i, |
300 | READ, | 323 | READ, |
301 | isect, pages[i], be_read, | 324 | isect, pages[i], be_read, |
302 | bl_end_io_read, par); | 325 | bl_end_io_read, par, |
326 | pg_offset, pg_len); | ||
303 | if (IS_ERR(bio)) { | 327 | if (IS_ERR(bio)) { |
304 | header->pnfs_error = PTR_ERR(bio); | 328 | header->pnfs_error = PTR_ERR(bio); |
305 | bio = NULL; | 329 | bio = NULL; |
306 | goto out; | 330 | goto out; |
307 | } | 331 | } |
308 | } | 332 | } |
309 | isect += PAGE_CACHE_SECTORS; | 333 | isect += (pg_len >> SECTOR_SHIFT); |
310 | extent_length -= PAGE_CACHE_SECTORS; | 334 | extent_length -= PAGE_CACHE_SECTORS; |
311 | } | 335 | } |
312 | if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { | 336 | if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { |
313 | rdata->res.eof = 1; | 337 | rdata->res.eof = 1; |
314 | rdata->res.count = header->inode->i_size - f_offset; | 338 | rdata->res.count = header->inode->i_size - rdata->args.offset; |
315 | } else { | 339 | } else { |
316 | rdata->res.count = (isect << SECTOR_SHIFT) - f_offset; | 340 | rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset; |
317 | } | 341 | } |
318 | out: | 342 | out: |
319 | bl_put_extent(be); | 343 | bl_put_extent(be); |
@@ -461,6 +485,106 @@ map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be) | |||
461 | return; | 485 | return; |
462 | } | 486 | } |
463 | 487 | ||
488 | static void | ||
489 | bl_read_single_end_io(struct bio *bio, int error) | ||
490 | { | ||
491 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
492 | struct page *page = bvec->bv_page; | ||
493 | |||
494 | /* Only one page in bvec */ | ||
495 | unlock_page(page); | ||
496 | } | ||
497 | |||
498 | static int | ||
499 | bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be, | ||
500 | unsigned int offset, unsigned int len) | ||
501 | { | ||
502 | struct bio *bio; | ||
503 | struct page *shadow_page; | ||
504 | sector_t isect; | ||
505 | char *kaddr, *kshadow_addr; | ||
506 | int ret = 0; | ||
507 | |||
508 | dprintk("%s: offset %u len %u\n", __func__, offset, len); | ||
509 | |||
510 | shadow_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
511 | if (shadow_page == NULL) | ||
512 | return -ENOMEM; | ||
513 | |||
514 | bio = bio_alloc(GFP_NOIO, 1); | ||
515 | if (bio == NULL) | ||
516 | return -ENOMEM; | ||
517 | |||
518 | isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) + | ||
519 | (offset / SECTOR_SIZE); | ||
520 | |||
521 | bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; | ||
522 | bio->bi_bdev = be->be_mdev; | ||
523 | bio->bi_end_io = bl_read_single_end_io; | ||
524 | |||
525 | lock_page(shadow_page); | ||
526 | if (bio_add_page(bio, shadow_page, | ||
527 | SECTOR_SIZE, round_down(offset, SECTOR_SIZE)) == 0) { | ||
528 | unlock_page(shadow_page); | ||
529 | bio_put(bio); | ||
530 | return -EIO; | ||
531 | } | ||
532 | |||
533 | submit_bio(READ, bio); | ||
534 | wait_on_page_locked(shadow_page); | ||
535 | if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) { | ||
536 | ret = -EIO; | ||
537 | } else { | ||
538 | kaddr = kmap_atomic(page); | ||
539 | kshadow_addr = kmap_atomic(shadow_page); | ||
540 | memcpy(kaddr + offset, kshadow_addr + offset, len); | ||
541 | kunmap_atomic(kshadow_addr); | ||
542 | kunmap_atomic(kaddr); | ||
543 | } | ||
544 | __free_page(shadow_page); | ||
545 | bio_put(bio); | ||
546 | |||
547 | return ret; | ||
548 | } | ||
549 | |||
550 | static int | ||
551 | bl_read_partial_page_sync(struct page *page, struct pnfs_block_extent *be, | ||
552 | unsigned int dirty_offset, unsigned int dirty_len, | ||
553 | bool full_page) | ||
554 | { | ||
555 | int ret = 0; | ||
556 | unsigned int start, end; | ||
557 | |||
558 | if (full_page) { | ||
559 | start = 0; | ||
560 | end = PAGE_CACHE_SIZE; | ||
561 | } else { | ||
562 | start = round_down(dirty_offset, SECTOR_SIZE); | ||
563 | end = round_up(dirty_offset + dirty_len, SECTOR_SIZE); | ||
564 | } | ||
565 | |||
566 | dprintk("%s: offset %u len %d\n", __func__, dirty_offset, dirty_len); | ||
567 | if (!be) { | ||
568 | zero_user_segments(page, start, dirty_offset, | ||
569 | dirty_offset + dirty_len, end); | ||
570 | if (start == 0 && end == PAGE_CACHE_SIZE && | ||
571 | trylock_page(page)) { | ||
572 | SetPageUptodate(page); | ||
573 | unlock_page(page); | ||
574 | } | ||
575 | return ret; | ||
576 | } | ||
577 | |||
578 | if (start != dirty_offset) | ||
579 | ret = bl_do_readpage_sync(page, be, start, dirty_offset - start); | ||
580 | |||
581 | if (!ret && (dirty_offset + dirty_len < end)) | ||
582 | ret = bl_do_readpage_sync(page, be, dirty_offset + dirty_len, | ||
583 | end - dirty_offset - dirty_len); | ||
584 | |||
585 | return ret; | ||
586 | } | ||
587 | |||
464 | /* Given an unmapped page, zero it or read in page for COW, page is locked | 588 | /* Given an unmapped page, zero it or read in page for COW, page is locked |
465 | * by caller. | 589 | * by caller. |
466 | */ | 590 | */ |
@@ -494,7 +618,6 @@ init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read) | |||
494 | SetPageUptodate(page); | 618 | SetPageUptodate(page); |
495 | 619 | ||
496 | cleanup: | 620 | cleanup: |
497 | bl_put_extent(cow_read); | ||
498 | if (bh) | 621 | if (bh) |
499 | free_buffer_head(bh); | 622 | free_buffer_head(bh); |
500 | if (ret) { | 623 | if (ret) { |
@@ -566,6 +689,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) | |||
566 | struct parallel_io *par = NULL; | 689 | struct parallel_io *par = NULL; |
567 | loff_t offset = wdata->args.offset; | 690 | loff_t offset = wdata->args.offset; |
568 | size_t count = wdata->args.count; | 691 | size_t count = wdata->args.count; |
692 | unsigned int pg_offset, pg_len, saved_len; | ||
569 | struct page **pages = wdata->args.pages; | 693 | struct page **pages = wdata->args.pages; |
570 | struct page *page; | 694 | struct page *page; |
571 | pgoff_t index; | 695 | pgoff_t index; |
@@ -574,10 +698,13 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) | |||
574 | NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; | 698 | NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; |
575 | 699 | ||
576 | dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); | 700 | dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); |
577 | /* Check for alignment first */ | ||
578 | if (!bl_check_alignment(offset, count, PAGE_CACHE_MASK)) | ||
579 | goto out_mds; | ||
580 | 701 | ||
702 | if (header->dreq != NULL && | ||
703 | (!IS_ALIGNED(offset, NFS_SERVER(header->inode)->pnfs_blksize) || | ||
704 | !IS_ALIGNED(count, NFS_SERVER(header->inode)->pnfs_blksize))) { | ||
705 | dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n"); | ||
706 | goto out_mds; | ||
707 | } | ||
581 | /* At this point, wdata->pages is a (sequential) list of nfs_pages. | 708 | /* At this point, wdata->pages is a (sequential) list of nfs_pages. |
582 | * We want to write each, and if there is an error set pnfs_error | 709 | * We want to write each, and if there is an error set pnfs_error |
583 | * to have it redone using nfs. | 710 | * to have it redone using nfs. |
@@ -674,10 +801,11 @@ next_page: | |||
674 | if (!extent_length) { | 801 | if (!extent_length) { |
675 | /* We've used up the previous extent */ | 802 | /* We've used up the previous extent */ |
676 | bl_put_extent(be); | 803 | bl_put_extent(be); |
804 | bl_put_extent(cow_read); | ||
677 | bio = bl_submit_bio(WRITE, bio); | 805 | bio = bl_submit_bio(WRITE, bio); |
678 | /* Get the next one */ | 806 | /* Get the next one */ |
679 | be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), | 807 | be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), |
680 | isect, NULL); | 808 | isect, &cow_read); |
681 | if (!be || !is_writable(be, isect)) { | 809 | if (!be || !is_writable(be, isect)) { |
682 | header->pnfs_error = -EINVAL; | 810 | header->pnfs_error = -EINVAL; |
683 | goto out; | 811 | goto out; |
@@ -694,7 +822,26 @@ next_page: | |||
694 | extent_length = be->be_length - | 822 | extent_length = be->be_length - |
695 | (isect - be->be_f_offset); | 823 | (isect - be->be_f_offset); |
696 | } | 824 | } |
697 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | 825 | |
826 | dprintk("%s offset %lld count %Zu\n", __func__, offset, count); | ||
827 | pg_offset = offset & ~PAGE_CACHE_MASK; | ||
828 | if (pg_offset + count > PAGE_CACHE_SIZE) | ||
829 | pg_len = PAGE_CACHE_SIZE - pg_offset; | ||
830 | else | ||
831 | pg_len = count; | ||
832 | |||
833 | saved_len = pg_len; | ||
834 | if (be->be_state == PNFS_BLOCK_INVALID_DATA && | ||
835 | !bl_is_sector_init(be->be_inval, isect)) { | ||
836 | ret = bl_read_partial_page_sync(pages[i], cow_read, | ||
837 | pg_offset, pg_len, true); | ||
838 | if (ret) { | ||
839 | dprintk("%s bl_read_partial_page_sync fail %d\n", | ||
840 | __func__, ret); | ||
841 | header->pnfs_error = ret; | ||
842 | goto out; | ||
843 | } | ||
844 | |||
698 | ret = bl_mark_sectors_init(be->be_inval, isect, | 845 | ret = bl_mark_sectors_init(be->be_inval, isect, |
699 | PAGE_CACHE_SECTORS); | 846 | PAGE_CACHE_SECTORS); |
700 | if (unlikely(ret)) { | 847 | if (unlikely(ret)) { |
@@ -703,15 +850,35 @@ next_page: | |||
703 | header->pnfs_error = ret; | 850 | header->pnfs_error = ret; |
704 | goto out; | 851 | goto out; |
705 | } | 852 | } |
853 | |||
854 | /* Expand to full page write */ | ||
855 | pg_offset = 0; | ||
856 | pg_len = PAGE_CACHE_SIZE; | ||
857 | } else if ((pg_offset & (SECTOR_SIZE - 1)) || | ||
858 | (pg_len & (SECTOR_SIZE - 1))){ | ||
859 | /* ahh, nasty case. We have to do sync full sector | ||
860 | * read-modify-write cycles. | ||
861 | */ | ||
862 | unsigned int saved_offset = pg_offset; | ||
863 | ret = bl_read_partial_page_sync(pages[i], be, pg_offset, | ||
864 | pg_len, false); | ||
865 | pg_offset = round_down(pg_offset, SECTOR_SIZE); | ||
866 | pg_len = round_up(saved_offset + pg_len, SECTOR_SIZE) | ||
867 | - pg_offset; | ||
706 | } | 868 | } |
707 | bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, | 869 | |
870 | |||
871 | bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, | ||
708 | isect, pages[i], be, | 872 | isect, pages[i], be, |
709 | bl_end_io_write, par); | 873 | bl_end_io_write, par, |
874 | pg_offset, pg_len); | ||
710 | if (IS_ERR(bio)) { | 875 | if (IS_ERR(bio)) { |
711 | header->pnfs_error = PTR_ERR(bio); | 876 | header->pnfs_error = PTR_ERR(bio); |
712 | bio = NULL; | 877 | bio = NULL; |
713 | goto out; | 878 | goto out; |
714 | } | 879 | } |
880 | offset += saved_len; | ||
881 | count -= saved_len; | ||
715 | isect += PAGE_CACHE_SECTORS; | 882 | isect += PAGE_CACHE_SECTORS; |
716 | last_isect = isect; | 883 | last_isect = isect; |
717 | extent_length -= PAGE_CACHE_SECTORS; | 884 | extent_length -= PAGE_CACHE_SECTORS; |
@@ -729,17 +896,16 @@ next_page: | |||
729 | } | 896 | } |
730 | 897 | ||
731 | write_done: | 898 | write_done: |
732 | wdata->res.count = (last_isect << SECTOR_SHIFT) - (offset); | 899 | wdata->res.count = wdata->args.count; |
733 | if (count < wdata->res.count) { | ||
734 | wdata->res.count = count; | ||
735 | } | ||
736 | out: | 900 | out: |
737 | bl_put_extent(be); | 901 | bl_put_extent(be); |
902 | bl_put_extent(cow_read); | ||
738 | bl_submit_bio(WRITE, bio); | 903 | bl_submit_bio(WRITE, bio); |
739 | put_parallel(par); | 904 | put_parallel(par); |
740 | return PNFS_ATTEMPTED; | 905 | return PNFS_ATTEMPTED; |
741 | out_mds: | 906 | out_mds: |
742 | bl_put_extent(be); | 907 | bl_put_extent(be); |
908 | bl_put_extent(cow_read); | ||
743 | kfree(par); | 909 | kfree(par); |
744 | return PNFS_NOT_ATTEMPTED; | 910 | return PNFS_NOT_ATTEMPTED; |
745 | } | 911 | } |
@@ -874,7 +1040,7 @@ static void free_blk_mountid(struct block_mount_id *mid) | |||
874 | } | 1040 | } |
875 | } | 1041 | } |
876 | 1042 | ||
877 | /* This is mostly copied from the filelayout's get_device_info function. | 1043 | /* This is mostly copied from the filelayout_get_device_info function. |
878 | * It seems much of this should be at the generic pnfs level. | 1044 | * It seems much of this should be at the generic pnfs level. |
879 | */ | 1045 | */ |
880 | static struct pnfs_block_dev * | 1046 | static struct pnfs_block_dev * |
@@ -1011,33 +1177,95 @@ bl_clear_layoutdriver(struct nfs_server *server) | |||
1011 | return 0; | 1177 | return 0; |
1012 | } | 1178 | } |
1013 | 1179 | ||
1180 | static bool | ||
1181 | is_aligned_req(struct nfs_page *req, unsigned int alignment) | ||
1182 | { | ||
1183 | return IS_ALIGNED(req->wb_offset, alignment) && | ||
1184 | IS_ALIGNED(req->wb_bytes, alignment); | ||
1185 | } | ||
1186 | |||
1014 | static void | 1187 | static void |
1015 | bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 1188 | bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) |
1016 | { | 1189 | { |
1017 | if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK)) | 1190 | if (pgio->pg_dreq != NULL && |
1191 | !is_aligned_req(req, SECTOR_SIZE)) | ||
1018 | nfs_pageio_reset_read_mds(pgio); | 1192 | nfs_pageio_reset_read_mds(pgio); |
1019 | else | 1193 | else |
1020 | pnfs_generic_pg_init_read(pgio, req); | 1194 | pnfs_generic_pg_init_read(pgio, req); |
1021 | } | 1195 | } |
1022 | 1196 | ||
1197 | static bool | ||
1198 | bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | ||
1199 | struct nfs_page *req) | ||
1200 | { | ||
1201 | if (pgio->pg_dreq != NULL && | ||
1202 | !is_aligned_req(req, SECTOR_SIZE)) | ||
1203 | return false; | ||
1204 | |||
1205 | return pnfs_generic_pg_test(pgio, prev, req); | ||
1206 | } | ||
1207 | |||
1208 | /* | ||
1209 | * Return the number of contiguous bytes for a given inode | ||
1210 | * starting at page frame idx. | ||
1211 | */ | ||
1212 | static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) | ||
1213 | { | ||
1214 | struct address_space *mapping = inode->i_mapping; | ||
1215 | pgoff_t end; | ||
1216 | |||
1217 | /* Optimize common case that writes from 0 to end of file */ | ||
1218 | end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); | ||
1219 | if (end != NFS_I(inode)->npages) { | ||
1220 | rcu_read_lock(); | ||
1221 | end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX); | ||
1222 | rcu_read_unlock(); | ||
1223 | } | ||
1224 | |||
1225 | if (!end) | ||
1226 | return i_size_read(inode) - (idx << PAGE_CACHE_SHIFT); | ||
1227 | else | ||
1228 | return (end - idx) << PAGE_CACHE_SHIFT; | ||
1229 | } | ||
1230 | |||
1023 | static void | 1231 | static void |
1024 | bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 1232 | bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) |
1025 | { | 1233 | { |
1026 | if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK)) | 1234 | if (pgio->pg_dreq != NULL && |
1235 | !is_aligned_req(req, PAGE_CACHE_SIZE)) { | ||
1027 | nfs_pageio_reset_write_mds(pgio); | 1236 | nfs_pageio_reset_write_mds(pgio); |
1028 | else | 1237 | } else { |
1029 | pnfs_generic_pg_init_write(pgio, req); | 1238 | u64 wb_size; |
1239 | if (pgio->pg_dreq == NULL) | ||
1240 | wb_size = pnfs_num_cont_bytes(pgio->pg_inode, | ||
1241 | req->wb_index); | ||
1242 | else | ||
1243 | wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); | ||
1244 | |||
1245 | pnfs_generic_pg_init_write(pgio, req, wb_size); | ||
1246 | } | ||
1247 | } | ||
1248 | |||
1249 | static bool | ||
1250 | bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | ||
1251 | struct nfs_page *req) | ||
1252 | { | ||
1253 | if (pgio->pg_dreq != NULL && | ||
1254 | !is_aligned_req(req, PAGE_CACHE_SIZE)) | ||
1255 | return false; | ||
1256 | |||
1257 | return pnfs_generic_pg_test(pgio, prev, req); | ||
1030 | } | 1258 | } |
1031 | 1259 | ||
1032 | static const struct nfs_pageio_ops bl_pg_read_ops = { | 1260 | static const struct nfs_pageio_ops bl_pg_read_ops = { |
1033 | .pg_init = bl_pg_init_read, | 1261 | .pg_init = bl_pg_init_read, |
1034 | .pg_test = pnfs_generic_pg_test, | 1262 | .pg_test = bl_pg_test_read, |
1035 | .pg_doio = pnfs_generic_pg_readpages, | 1263 | .pg_doio = pnfs_generic_pg_readpages, |
1036 | }; | 1264 | }; |
1037 | 1265 | ||
1038 | static const struct nfs_pageio_ops bl_pg_write_ops = { | 1266 | static const struct nfs_pageio_ops bl_pg_write_ops = { |
1039 | .pg_init = bl_pg_init_write, | 1267 | .pg_init = bl_pg_init_write, |
1040 | .pg_test = pnfs_generic_pg_test, | 1268 | .pg_test = bl_pg_test_write, |
1041 | .pg_doio = pnfs_generic_pg_writepages, | 1269 | .pg_doio = pnfs_generic_pg_writepages, |
1042 | }; | 1270 | }; |
1043 | 1271 | ||
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 03350690118e..f4891bde8851 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h | |||
@@ -41,6 +41,7 @@ | |||
41 | 41 | ||
42 | #define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT) | 42 | #define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT) |
43 | #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) | 43 | #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) |
44 | #define SECTOR_SIZE (1 << SECTOR_SHIFT) | ||
44 | 45 | ||
45 | struct block_mount_id { | 46 | struct block_mount_id { |
46 | spinlock_t bm_lock; /* protects list */ | 47 | spinlock_t bm_lock; /* protects list */ |
@@ -172,7 +173,6 @@ struct bl_msg_hdr { | |||
172 | /* blocklayoutdev.c */ | 173 | /* blocklayoutdev.c */ |
173 | ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t); | 174 | ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t); |
174 | void bl_pipe_destroy_msg(struct rpc_pipe_msg *); | 175 | void bl_pipe_destroy_msg(struct rpc_pipe_msg *); |
175 | struct block_device *nfs4_blkdev_get(dev_t dev); | ||
176 | int nfs4_blkdev_put(struct block_device *bdev); | 176 | int nfs4_blkdev_put(struct block_device *bdev); |
177 | struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server, | 177 | struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server, |
178 | struct pnfs_device *dev); | 178 | struct pnfs_device *dev); |
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index c96554245ccf..a86c5bdad9e3 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c | |||
@@ -53,22 +53,6 @@ static int decode_sector_number(__be32 **rp, sector_t *sp) | |||
53 | return 0; | 53 | return 0; |
54 | } | 54 | } |
55 | 55 | ||
56 | /* Open a block_device by device number. */ | ||
57 | struct block_device *nfs4_blkdev_get(dev_t dev) | ||
58 | { | ||
59 | struct block_device *bd; | ||
60 | |||
61 | dprintk("%s enter\n", __func__); | ||
62 | bd = blkdev_get_by_dev(dev, FMODE_READ, NULL); | ||
63 | if (IS_ERR(bd)) | ||
64 | goto fail; | ||
65 | return bd; | ||
66 | fail: | ||
67 | dprintk("%s failed to open device : %ld\n", | ||
68 | __func__, PTR_ERR(bd)); | ||
69 | return NULL; | ||
70 | } | ||
71 | |||
72 | /* | 56 | /* |
73 | * Release the block device | 57 | * Release the block device |
74 | */ | 58 | */ |
@@ -172,11 +156,12 @@ nfs4_blk_decode_device(struct nfs_server *server, | |||
172 | goto out; | 156 | goto out; |
173 | } | 157 | } |
174 | 158 | ||
175 | bd = nfs4_blkdev_get(MKDEV(reply->major, reply->minor)); | 159 | bd = blkdev_get_by_dev(MKDEV(reply->major, reply->minor), |
160 | FMODE_READ, NULL); | ||
176 | if (IS_ERR(bd)) { | 161 | if (IS_ERR(bd)) { |
177 | rc = PTR_ERR(bd); | 162 | dprintk("%s failed to open device : %ld\n", __func__, |
178 | dprintk("%s failed to open device : %d\n", __func__, rc); | 163 | PTR_ERR(bd)); |
179 | rv = ERR_PTR(rc); | 164 | rv = ERR_CAST(bd); |
180 | goto out; | 165 | goto out; |
181 | } | 166 | } |
182 | 167 | ||
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index 1f9a6032796b..9c3e117c3ed1 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c | |||
@@ -683,8 +683,7 @@ encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | |||
683 | p = xdr_encode_hyper(p, lce->bse_length << SECTOR_SHIFT); | 683 | p = xdr_encode_hyper(p, lce->bse_length << SECTOR_SHIFT); |
684 | p = xdr_encode_hyper(p, 0LL); | 684 | p = xdr_encode_hyper(p, 0LL); |
685 | *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA); | 685 | *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA); |
686 | list_del(&lce->bse_node); | 686 | list_move_tail(&lce->bse_node, &bl->bl_committing); |
687 | list_add_tail(&lce->bse_node, &bl->bl_committing); | ||
688 | bl->bl_count--; | 687 | bl->bl_count--; |
689 | count++; | 688 | count++; |
690 | } | 689 | } |
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 4c8459e5bdee..9a521fb39869 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/sunrpc/svc.h> | 12 | #include <linux/sunrpc/svc.h> |
13 | #include <linux/sunrpc/svcsock.h> | 13 | #include <linux/sunrpc/svcsock.h> |
14 | #include <linux/nfs_fs.h> | 14 | #include <linux/nfs_fs.h> |
15 | #include <linux/errno.h> | ||
15 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
16 | #include <linux/freezer.h> | 17 | #include <linux/freezer.h> |
17 | #include <linux/kthread.h> | 18 | #include <linux/kthread.h> |
@@ -23,6 +24,7 @@ | |||
23 | #include "nfs4_fs.h" | 24 | #include "nfs4_fs.h" |
24 | #include "callback.h" | 25 | #include "callback.h" |
25 | #include "internal.h" | 26 | #include "internal.h" |
27 | #include "netns.h" | ||
26 | 28 | ||
27 | #define NFSDBG_FACILITY NFSDBG_CALLBACK | 29 | #define NFSDBG_FACILITY NFSDBG_CALLBACK |
28 | 30 | ||
@@ -37,7 +39,32 @@ static struct nfs_callback_data nfs_callback_info[NFS4_MAX_MINOR_VERSION + 1]; | |||
37 | static DEFINE_MUTEX(nfs_callback_mutex); | 39 | static DEFINE_MUTEX(nfs_callback_mutex); |
38 | static struct svc_program nfs4_callback_program; | 40 | static struct svc_program nfs4_callback_program; |
39 | 41 | ||
40 | unsigned short nfs_callback_tcpport6; | 42 | static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net) |
43 | { | ||
44 | int ret; | ||
45 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
46 | |||
47 | ret = svc_create_xprt(serv, "tcp", net, PF_INET, | ||
48 | nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); | ||
49 | if (ret <= 0) | ||
50 | goto out_err; | ||
51 | nn->nfs_callback_tcpport = ret; | ||
52 | dprintk("NFS: Callback listener port = %u (af %u, net %p)\n", | ||
53 | nn->nfs_callback_tcpport, PF_INET, net); | ||
54 | |||
55 | ret = svc_create_xprt(serv, "tcp", net, PF_INET6, | ||
56 | nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); | ||
57 | if (ret > 0) { | ||
58 | nn->nfs_callback_tcpport6 = ret; | ||
59 | dprintk("NFS: Callback listener port = %u (af %u, net %p)\n", | ||
60 | nn->nfs_callback_tcpport6, PF_INET6, net); | ||
61 | } else if (ret != -EAFNOSUPPORT) | ||
62 | goto out_err; | ||
63 | return 0; | ||
64 | |||
65 | out_err: | ||
66 | return (ret) ? ret : -ENOMEM; | ||
67 | } | ||
41 | 68 | ||
42 | /* | 69 | /* |
43 | * This is the NFSv4 callback kernel thread. | 70 | * This is the NFSv4 callback kernel thread. |
@@ -45,7 +72,7 @@ unsigned short nfs_callback_tcpport6; | |||
45 | static int | 72 | static int |
46 | nfs4_callback_svc(void *vrqstp) | 73 | nfs4_callback_svc(void *vrqstp) |
47 | { | 74 | { |
48 | int err, preverr = 0; | 75 | int err; |
49 | struct svc_rqst *rqstp = vrqstp; | 76 | struct svc_rqst *rqstp = vrqstp; |
50 | 77 | ||
51 | set_freezable(); | 78 | set_freezable(); |
@@ -55,20 +82,8 @@ nfs4_callback_svc(void *vrqstp) | |||
55 | * Listen for a request on the socket | 82 | * Listen for a request on the socket |
56 | */ | 83 | */ |
57 | err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT); | 84 | err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT); |
58 | if (err == -EAGAIN || err == -EINTR) { | 85 | if (err == -EAGAIN || err == -EINTR) |
59 | preverr = err; | ||
60 | continue; | ||
61 | } | ||
62 | if (err < 0) { | ||
63 | if (err != preverr) { | ||
64 | printk(KERN_WARNING "NFS: %s: unexpected error " | ||
65 | "from svc_recv (%d)\n", __func__, err); | ||
66 | preverr = err; | ||
67 | } | ||
68 | schedule_timeout_uninterruptible(HZ); | ||
69 | continue; | 86 | continue; |
70 | } | ||
71 | preverr = err; | ||
72 | svc_process(rqstp); | 87 | svc_process(rqstp); |
73 | } | 88 | } |
74 | return 0; | 89 | return 0; |
@@ -78,38 +93,23 @@ nfs4_callback_svc(void *vrqstp) | |||
78 | * Prepare to bring up the NFSv4 callback service | 93 | * Prepare to bring up the NFSv4 callback service |
79 | */ | 94 | */ |
80 | static struct svc_rqst * | 95 | static struct svc_rqst * |
81 | nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) | 96 | nfs4_callback_up(struct svc_serv *serv) |
82 | { | 97 | { |
83 | int ret; | ||
84 | |||
85 | ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET, | ||
86 | nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); | ||
87 | if (ret <= 0) | ||
88 | goto out_err; | ||
89 | nfs_callback_tcpport = ret; | ||
90 | dprintk("NFS: Callback listener port = %u (af %u)\n", | ||
91 | nfs_callback_tcpport, PF_INET); | ||
92 | |||
93 | ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6, | ||
94 | nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); | ||
95 | if (ret > 0) { | ||
96 | nfs_callback_tcpport6 = ret; | ||
97 | dprintk("NFS: Callback listener port = %u (af %u)\n", | ||
98 | nfs_callback_tcpport6, PF_INET6); | ||
99 | } else if (ret == -EAFNOSUPPORT) | ||
100 | ret = 0; | ||
101 | else | ||
102 | goto out_err; | ||
103 | |||
104 | return svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); | 98 | return svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); |
105 | |||
106 | out_err: | ||
107 | if (ret == 0) | ||
108 | ret = -ENOMEM; | ||
109 | return ERR_PTR(ret); | ||
110 | } | 99 | } |
111 | 100 | ||
112 | #if defined(CONFIG_NFS_V4_1) | 101 | #if defined(CONFIG_NFS_V4_1) |
102 | static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) | ||
103 | { | ||
104 | /* | ||
105 | * Create an svc_sock for the back channel service that shares the | ||
106 | * fore channel connection. | ||
107 | * Returns the input port (0) and sets the svc_serv bc_xprt on success | ||
108 | */ | ||
109 | return svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0, | ||
110 | SVC_SOCK_ANONYMOUS); | ||
111 | } | ||
112 | |||
113 | /* | 113 | /* |
114 | * The callback service for NFSv4.1 callbacks | 114 | * The callback service for NFSv4.1 callbacks |
115 | */ | 115 | */ |
@@ -149,28 +149,9 @@ nfs41_callback_svc(void *vrqstp) | |||
149 | * Bring up the NFSv4.1 callback service | 149 | * Bring up the NFSv4.1 callback service |
150 | */ | 150 | */ |
151 | static struct svc_rqst * | 151 | static struct svc_rqst * |
152 | nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) | 152 | nfs41_callback_up(struct svc_serv *serv) |
153 | { | 153 | { |
154 | struct svc_rqst *rqstp; | 154 | struct svc_rqst *rqstp; |
155 | int ret; | ||
156 | |||
157 | /* | ||
158 | * Create an svc_sock for the back channel service that shares the | ||
159 | * fore channel connection. | ||
160 | * Returns the input port (0) and sets the svc_serv bc_xprt on success | ||
161 | */ | ||
162 | ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0, | ||
163 | SVC_SOCK_ANONYMOUS); | ||
164 | if (ret < 0) { | ||
165 | rqstp = ERR_PTR(ret); | ||
166 | goto out; | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * Save the svc_serv in the transport so that it can | ||
171 | * be referenced when the session backchannel is initialized | ||
172 | */ | ||
173 | xprt->bc_serv = serv; | ||
174 | 155 | ||
175 | INIT_LIST_HEAD(&serv->sv_cb_list); | 156 | INIT_LIST_HEAD(&serv->sv_cb_list); |
176 | spin_lock_init(&serv->sv_cb_lock); | 157 | spin_lock_init(&serv->sv_cb_lock); |
@@ -180,90 +161,74 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) | |||
180 | svc_xprt_put(serv->sv_bc_xprt); | 161 | svc_xprt_put(serv->sv_bc_xprt); |
181 | serv->sv_bc_xprt = NULL; | 162 | serv->sv_bc_xprt = NULL; |
182 | } | 163 | } |
183 | out: | ||
184 | dprintk("--> %s return %ld\n", __func__, | 164 | dprintk("--> %s return %ld\n", __func__, |
185 | IS_ERR(rqstp) ? PTR_ERR(rqstp) : 0); | 165 | IS_ERR(rqstp) ? PTR_ERR(rqstp) : 0); |
186 | return rqstp; | 166 | return rqstp; |
187 | } | 167 | } |
188 | 168 | ||
189 | static inline int nfs_minorversion_callback_svc_setup(u32 minorversion, | 169 | static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv, |
190 | struct svc_serv *serv, struct rpc_xprt *xprt, | ||
191 | struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) | 170 | struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) |
192 | { | 171 | { |
193 | if (minorversion) { | 172 | *rqstpp = nfs41_callback_up(serv); |
194 | *rqstpp = nfs41_callback_up(serv, xprt); | 173 | *callback_svc = nfs41_callback_svc; |
195 | *callback_svc = nfs41_callback_svc; | ||
196 | } | ||
197 | return minorversion; | ||
198 | } | 174 | } |
199 | 175 | ||
200 | static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, | 176 | static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, |
201 | struct nfs_callback_data *cb_info) | 177 | struct svc_serv *serv) |
202 | { | 178 | { |
203 | if (minorversion) | 179 | if (minorversion) |
204 | xprt->bc_serv = cb_info->serv; | 180 | /* |
181 | * Save the svc_serv in the transport so that it can | ||
182 | * be referenced when the session backchannel is initialized | ||
183 | */ | ||
184 | xprt->bc_serv = serv; | ||
205 | } | 185 | } |
206 | #else | 186 | #else |
207 | static inline int nfs_minorversion_callback_svc_setup(u32 minorversion, | 187 | static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) |
208 | struct svc_serv *serv, struct rpc_xprt *xprt, | ||
209 | struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) | ||
210 | { | 188 | { |
211 | return 0; | 189 | return 0; |
212 | } | 190 | } |
213 | 191 | ||
192 | static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv, | ||
193 | struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) | ||
194 | { | ||
195 | *rqstpp = ERR_PTR(-ENOTSUPP); | ||
196 | *callback_svc = ERR_PTR(-ENOTSUPP); | ||
197 | } | ||
198 | |||
214 | static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, | 199 | static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, |
215 | struct nfs_callback_data *cb_info) | 200 | struct svc_serv *serv) |
216 | { | 201 | { |
217 | } | 202 | } |
218 | #endif /* CONFIG_NFS_V4_1 */ | 203 | #endif /* CONFIG_NFS_V4_1 */ |
219 | 204 | ||
220 | /* | 205 | static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, |
221 | * Bring up the callback thread if it is not already up. | 206 | struct svc_serv *serv) |
222 | */ | ||
223 | int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) | ||
224 | { | 207 | { |
225 | struct svc_serv *serv = NULL; | ||
226 | struct svc_rqst *rqstp; | 208 | struct svc_rqst *rqstp; |
227 | int (*callback_svc)(void *vrqstp); | 209 | int (*callback_svc)(void *vrqstp); |
228 | struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; | 210 | struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; |
229 | char svc_name[12]; | 211 | char svc_name[12]; |
230 | int ret = 0; | 212 | int ret; |
231 | int minorversion_setup; | ||
232 | struct net *net = &init_net; | ||
233 | 213 | ||
234 | mutex_lock(&nfs_callback_mutex); | 214 | nfs_callback_bc_serv(minorversion, xprt, serv); |
235 | if (cb_info->users++ || cb_info->task != NULL) { | ||
236 | nfs_callback_bc_serv(minorversion, xprt, cb_info); | ||
237 | goto out; | ||
238 | } | ||
239 | serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); | ||
240 | if (!serv) { | ||
241 | ret = -ENOMEM; | ||
242 | goto out_err; | ||
243 | } | ||
244 | /* As there is only one thread we need to over-ride the | ||
245 | * default maximum of 80 connections | ||
246 | */ | ||
247 | serv->sv_maxconn = 1024; | ||
248 | 215 | ||
249 | ret = svc_bind(serv, net); | 216 | if (cb_info->task) |
250 | if (ret < 0) { | 217 | return 0; |
251 | printk(KERN_WARNING "NFS: bind callback service failed\n"); | ||
252 | goto out_err; | ||
253 | } | ||
254 | 218 | ||
255 | minorversion_setup = nfs_minorversion_callback_svc_setup(minorversion, | 219 | switch (minorversion) { |
256 | serv, xprt, &rqstp, &callback_svc); | 220 | case 0: |
257 | if (!minorversion_setup) { | ||
258 | /* v4.0 callback setup */ | 221 | /* v4.0 callback setup */ |
259 | rqstp = nfs4_callback_up(serv, xprt); | 222 | rqstp = nfs4_callback_up(serv); |
260 | callback_svc = nfs4_callback_svc; | 223 | callback_svc = nfs4_callback_svc; |
224 | break; | ||
225 | default: | ||
226 | nfs_minorversion_callback_svc_setup(serv, | ||
227 | &rqstp, &callback_svc); | ||
261 | } | 228 | } |
262 | 229 | ||
263 | if (IS_ERR(rqstp)) { | 230 | if (IS_ERR(rqstp)) |
264 | ret = PTR_ERR(rqstp); | 231 | return PTR_ERR(rqstp); |
265 | goto out_err; | ||
266 | } | ||
267 | 232 | ||
268 | svc_sock_update_bufs(serv); | 233 | svc_sock_update_bufs(serv); |
269 | 234 | ||
@@ -276,41 +241,165 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) | |||
276 | svc_exit_thread(cb_info->rqst); | 241 | svc_exit_thread(cb_info->rqst); |
277 | cb_info->rqst = NULL; | 242 | cb_info->rqst = NULL; |
278 | cb_info->task = NULL; | 243 | cb_info->task = NULL; |
279 | goto out_err; | 244 | return PTR_ERR(cb_info->task); |
245 | } | ||
246 | dprintk("nfs_callback_up: service started\n"); | ||
247 | return 0; | ||
248 | } | ||
249 | |||
250 | static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struct net *net) | ||
251 | { | ||
252 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
253 | |||
254 | if (--nn->cb_users[minorversion]) | ||
255 | return; | ||
256 | |||
257 | dprintk("NFS: destroy per-net callback data; net=%p\n", net); | ||
258 | svc_shutdown_net(serv, net); | ||
259 | } | ||
260 | |||
261 | static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct net *net) | ||
262 | { | ||
263 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
264 | int ret; | ||
265 | |||
266 | if (nn->cb_users[minorversion]++) | ||
267 | return 0; | ||
268 | |||
269 | dprintk("NFS: create per-net callback data; net=%p\n", net); | ||
270 | |||
271 | ret = svc_bind(serv, net); | ||
272 | if (ret < 0) { | ||
273 | printk(KERN_WARNING "NFS: bind callback service failed\n"); | ||
274 | goto err_bind; | ||
275 | } | ||
276 | |||
277 | switch (minorversion) { | ||
278 | case 0: | ||
279 | ret = nfs4_callback_up_net(serv, net); | ||
280 | break; | ||
281 | case 1: | ||
282 | ret = nfs41_callback_up_net(serv, net); | ||
283 | break; | ||
284 | default: | ||
285 | printk(KERN_ERR "NFS: unknown callback version: %d\n", | ||
286 | minorversion); | ||
287 | ret = -EINVAL; | ||
288 | break; | ||
289 | } | ||
290 | |||
291 | if (ret < 0) { | ||
292 | printk(KERN_ERR "NFS: callback service start failed\n"); | ||
293 | goto err_socks; | ||
294 | } | ||
295 | return 0; | ||
296 | |||
297 | err_socks: | ||
298 | svc_rpcb_cleanup(serv, net); | ||
299 | err_bind: | ||
300 | dprintk("NFS: Couldn't create callback socket: err = %d; " | ||
301 | "net = %p\n", ret, net); | ||
302 | return ret; | ||
303 | } | ||
304 | |||
305 | static struct svc_serv *nfs_callback_create_svc(int minorversion) | ||
306 | { | ||
307 | struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; | ||
308 | struct svc_serv *serv; | ||
309 | |||
310 | /* | ||
311 | * Check whether we're already up and running. | ||
312 | */ | ||
313 | if (cb_info->task) { | ||
314 | /* | ||
315 | * Note: increase service usage, because later in case of error | ||
316 | * svc_destroy() will be called. | ||
317 | */ | ||
318 | svc_get(cb_info->serv); | ||
319 | return cb_info->serv; | ||
320 | } | ||
321 | |||
322 | /* | ||
323 | * Sanity check: if there's no task, | ||
324 | * we should be the first user ... | ||
325 | */ | ||
326 | if (cb_info->users) | ||
327 | printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n", | ||
328 | cb_info->users); | ||
329 | |||
330 | serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); | ||
331 | if (!serv) { | ||
332 | printk(KERN_ERR "nfs_callback_create_svc: create service failed\n"); | ||
333 | return ERR_PTR(-ENOMEM); | ||
334 | } | ||
335 | /* As there is only one thread we need to over-ride the | ||
336 | * default maximum of 80 connections | ||
337 | */ | ||
338 | serv->sv_maxconn = 1024; | ||
339 | dprintk("nfs_callback_create_svc: service created\n"); | ||
340 | return serv; | ||
341 | } | ||
342 | |||
343 | /* | ||
344 | * Bring up the callback thread if it is not already up. | ||
345 | */ | ||
346 | int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) | ||
347 | { | ||
348 | struct svc_serv *serv; | ||
349 | struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; | ||
350 | int ret; | ||
351 | struct net *net = xprt->xprt_net; | ||
352 | |||
353 | mutex_lock(&nfs_callback_mutex); | ||
354 | |||
355 | serv = nfs_callback_create_svc(minorversion); | ||
356 | if (IS_ERR(serv)) { | ||
357 | ret = PTR_ERR(serv); | ||
358 | goto err_create; | ||
280 | } | 359 | } |
281 | out: | 360 | |
361 | ret = nfs_callback_up_net(minorversion, serv, net); | ||
362 | if (ret < 0) | ||
363 | goto err_net; | ||
364 | |||
365 | ret = nfs_callback_start_svc(minorversion, xprt, serv); | ||
366 | if (ret < 0) | ||
367 | goto err_start; | ||
368 | |||
369 | cb_info->users++; | ||
282 | /* | 370 | /* |
283 | * svc_create creates the svc_serv with sv_nrthreads == 1, and then | 371 | * svc_create creates the svc_serv with sv_nrthreads == 1, and then |
284 | * svc_prepare_thread increments that. So we need to call svc_destroy | 372 | * svc_prepare_thread increments that. So we need to call svc_destroy |
285 | * on both success and failure so that the refcount is 1 when the | 373 | * on both success and failure so that the refcount is 1 when the |
286 | * thread exits. | 374 | * thread exits. |
287 | */ | 375 | */ |
288 | if (serv) | 376 | err_net: |
289 | svc_destroy(serv); | 377 | svc_destroy(serv); |
378 | err_create: | ||
290 | mutex_unlock(&nfs_callback_mutex); | 379 | mutex_unlock(&nfs_callback_mutex); |
291 | return ret; | 380 | return ret; |
292 | out_err: | 381 | |
293 | dprintk("NFS: Couldn't create callback socket or server thread; " | 382 | err_start: |
294 | "err = %d\n", ret); | 383 | nfs_callback_down_net(minorversion, serv, net); |
295 | cb_info->users--; | 384 | dprintk("NFS: Couldn't create server thread; err = %d\n", ret); |
296 | if (serv) | 385 | goto err_net; |
297 | svc_shutdown_net(serv, net); | ||
298 | goto out; | ||
299 | } | 386 | } |
300 | 387 | ||
301 | /* | 388 | /* |
302 | * Kill the callback thread if it's no longer being used. | 389 | * Kill the callback thread if it's no longer being used. |
303 | */ | 390 | */ |
304 | void nfs_callback_down(int minorversion) | 391 | void nfs_callback_down(int minorversion, struct net *net) |
305 | { | 392 | { |
306 | struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; | 393 | struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; |
307 | 394 | ||
308 | mutex_lock(&nfs_callback_mutex); | 395 | mutex_lock(&nfs_callback_mutex); |
396 | nfs_callback_down_net(minorversion, cb_info->serv, net); | ||
309 | cb_info->users--; | 397 | cb_info->users--; |
310 | if (cb_info->users == 0 && cb_info->task != NULL) { | 398 | if (cb_info->users == 0 && cb_info->task != NULL) { |
311 | kthread_stop(cb_info->task); | 399 | kthread_stop(cb_info->task); |
312 | svc_shutdown_net(cb_info->serv, &init_net); | 400 | dprintk("nfs_callback_down: service stopped\n"); |
313 | svc_exit_thread(cb_info->rqst); | 401 | svc_exit_thread(cb_info->rqst); |
402 | dprintk("nfs_callback_down: service destroyed\n"); | ||
314 | cb_info->serv = NULL; | 403 | cb_info->serv = NULL; |
315 | cb_info->rqst = NULL; | 404 | cb_info->rqst = NULL; |
316 | cb_info->task = NULL; | 405 | cb_info->task = NULL; |
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index b44d7b128b71..4251c2ae06ad 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h | |||
@@ -194,7 +194,7 @@ extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, | |||
194 | struct cb_process_state *cps); | 194 | struct cb_process_state *cps); |
195 | #if IS_ENABLED(CONFIG_NFS_V4) | 195 | #if IS_ENABLED(CONFIG_NFS_V4) |
196 | extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); | 196 | extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); |
197 | extern void nfs_callback_down(int minorversion); | 197 | extern void nfs_callback_down(int minorversion, struct net *net); |
198 | extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, | 198 | extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, |
199 | const nfs4_stateid *stateid); | 199 | const nfs4_stateid *stateid); |
200 | extern int nfs4_set_callback_sessionid(struct nfs_client *clp); | 200 | extern int nfs4_set_callback_sessionid(struct nfs_client *clp); |
@@ -209,6 +209,5 @@ extern int nfs4_set_callback_sessionid(struct nfs_client *clp); | |||
209 | 209 | ||
210 | extern unsigned int nfs_callback_set_tcpport; | 210 | extern unsigned int nfs_callback_set_tcpport; |
211 | extern unsigned short nfs_callback_tcpport; | 211 | extern unsigned short nfs_callback_tcpport; |
212 | extern unsigned short nfs_callback_tcpport6; | ||
213 | 212 | ||
214 | #endif /* __LINUX_FS_NFS_CALLBACK_H */ | 213 | #endif /* __LINUX_FS_NFS_CALLBACK_H */ |
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 1b5d809a105e..76b4a7a3e559 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -122,7 +122,15 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, | |||
122 | ino = igrab(lo->plh_inode); | 122 | ino = igrab(lo->plh_inode); |
123 | if (!ino) | 123 | if (!ino) |
124 | continue; | 124 | continue; |
125 | get_layout_hdr(lo); | 125 | spin_lock(&ino->i_lock); |
126 | /* Is this layout in the process of being freed? */ | ||
127 | if (NFS_I(ino)->layout != lo) { | ||
128 | spin_unlock(&ino->i_lock); | ||
129 | iput(ino); | ||
130 | continue; | ||
131 | } | ||
132 | pnfs_get_layout_hdr(lo); | ||
133 | spin_unlock(&ino->i_lock); | ||
126 | return lo; | 134 | return lo; |
127 | } | 135 | } |
128 | } | 136 | } |
@@ -158,7 +166,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, | |||
158 | ino = lo->plh_inode; | 166 | ino = lo->plh_inode; |
159 | spin_lock(&ino->i_lock); | 167 | spin_lock(&ino->i_lock); |
160 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || | 168 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || |
161 | mark_matching_lsegs_invalid(lo, &free_me_list, | 169 | pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, |
162 | &args->cbl_range)) | 170 | &args->cbl_range)) |
163 | rv = NFS4ERR_DELAY; | 171 | rv = NFS4ERR_DELAY; |
164 | else | 172 | else |
@@ -166,7 +174,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, | |||
166 | pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); | 174 | pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); |
167 | spin_unlock(&ino->i_lock); | 175 | spin_unlock(&ino->i_lock); |
168 | pnfs_free_lseg_list(&free_me_list); | 176 | pnfs_free_lseg_list(&free_me_list); |
169 | put_layout_hdr(lo); | 177 | pnfs_put_layout_hdr(lo); |
170 | iput(ino); | 178 | iput(ino); |
171 | return rv; | 179 | return rv; |
172 | } | 180 | } |
@@ -196,9 +204,18 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, | |||
196 | continue; | 204 | continue; |
197 | 205 | ||
198 | list_for_each_entry(lo, &server->layouts, plh_layouts) { | 206 | list_for_each_entry(lo, &server->layouts, plh_layouts) { |
199 | if (!igrab(lo->plh_inode)) | 207 | ino = igrab(lo->plh_inode); |
208 | if (ino) | ||
209 | continue; | ||
210 | spin_lock(&ino->i_lock); | ||
211 | /* Is this layout in the process of being freed? */ | ||
212 | if (NFS_I(ino)->layout != lo) { | ||
213 | spin_unlock(&ino->i_lock); | ||
214 | iput(ino); | ||
200 | continue; | 215 | continue; |
201 | get_layout_hdr(lo); | 216 | } |
217 | pnfs_get_layout_hdr(lo); | ||
218 | spin_unlock(&ino->i_lock); | ||
202 | BUG_ON(!list_empty(&lo->plh_bulk_recall)); | 219 | BUG_ON(!list_empty(&lo->plh_bulk_recall)); |
203 | list_add(&lo->plh_bulk_recall, &recall_list); | 220 | list_add(&lo->plh_bulk_recall, &recall_list); |
204 | } | 221 | } |
@@ -211,12 +228,12 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, | |||
211 | ino = lo->plh_inode; | 228 | ino = lo->plh_inode; |
212 | spin_lock(&ino->i_lock); | 229 | spin_lock(&ino->i_lock); |
213 | set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); | 230 | set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); |
214 | if (mark_matching_lsegs_invalid(lo, &free_me_list, &range)) | 231 | if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, &range)) |
215 | rv = NFS4ERR_DELAY; | 232 | rv = NFS4ERR_DELAY; |
216 | list_del_init(&lo->plh_bulk_recall); | 233 | list_del_init(&lo->plh_bulk_recall); |
217 | spin_unlock(&ino->i_lock); | 234 | spin_unlock(&ino->i_lock); |
218 | pnfs_free_lseg_list(&free_me_list); | 235 | pnfs_free_lseg_list(&free_me_list); |
219 | put_layout_hdr(lo); | 236 | pnfs_put_layout_hdr(lo); |
220 | iput(ino); | 237 | iput(ino); |
221 | } | 238 | } |
222 | return rv; | 239 | return rv; |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 99694442b93f..8b39a42ac35e 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -93,10 +93,10 @@ static struct nfs_subversion *find_nfs_version(unsigned int version) | |||
93 | spin_unlock(&nfs_version_lock); | 93 | spin_unlock(&nfs_version_lock); |
94 | return nfs; | 94 | return nfs; |
95 | } | 95 | } |
96 | }; | 96 | } |
97 | 97 | ||
98 | spin_unlock(&nfs_version_lock); | 98 | spin_unlock(&nfs_version_lock); |
99 | return ERR_PTR(-EPROTONOSUPPORT);; | 99 | return ERR_PTR(-EPROTONOSUPPORT); |
100 | } | 100 | } |
101 | 101 | ||
102 | struct nfs_subversion *get_nfs_version(unsigned int version) | 102 | struct nfs_subversion *get_nfs_version(unsigned int version) |
@@ -498,7 +498,8 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, | |||
498 | return nfs_found_client(cl_init, clp); | 498 | return nfs_found_client(cl_init, clp); |
499 | } | 499 | } |
500 | if (new) { | 500 | if (new) { |
501 | list_add(&new->cl_share_link, &nn->nfs_client_list); | 501 | list_add_tail(&new->cl_share_link, |
502 | &nn->nfs_client_list); | ||
502 | spin_unlock(&nn->nfs_client_lock); | 503 | spin_unlock(&nn->nfs_client_lock); |
503 | new->cl_flags = cl_init->init_flags; | 504 | new->cl_flags = cl_init->init_flags; |
504 | return rpc_ops->init_client(new, timeparms, ip_addr, | 505 | return rpc_ops->init_client(new, timeparms, ip_addr, |
@@ -668,7 +669,8 @@ int nfs_init_server_rpcclient(struct nfs_server *server, | |||
668 | { | 669 | { |
669 | struct nfs_client *clp = server->nfs_client; | 670 | struct nfs_client *clp = server->nfs_client; |
670 | 671 | ||
671 | server->client = rpc_clone_client(clp->cl_rpcclient); | 672 | server->client = rpc_clone_client_set_auth(clp->cl_rpcclient, |
673 | pseudoflavour); | ||
672 | if (IS_ERR(server->client)) { | 674 | if (IS_ERR(server->client)) { |
673 | dprintk("%s: couldn't create rpc_client!\n", __func__); | 675 | dprintk("%s: couldn't create rpc_client!\n", __func__); |
674 | return PTR_ERR(server->client); | 676 | return PTR_ERR(server->client); |
@@ -678,16 +680,6 @@ int nfs_init_server_rpcclient(struct nfs_server *server, | |||
678 | timeo, | 680 | timeo, |
679 | sizeof(server->client->cl_timeout_default)); | 681 | sizeof(server->client->cl_timeout_default)); |
680 | server->client->cl_timeout = &server->client->cl_timeout_default; | 682 | server->client->cl_timeout = &server->client->cl_timeout_default; |
681 | |||
682 | if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) { | ||
683 | struct rpc_auth *auth; | ||
684 | |||
685 | auth = rpcauth_create(pseudoflavour, server->client); | ||
686 | if (IS_ERR(auth)) { | ||
687 | dprintk("%s: couldn't create credcache!\n", __func__); | ||
688 | return PTR_ERR(auth); | ||
689 | } | ||
690 | } | ||
691 | server->client->cl_softrtry = 0; | 683 | server->client->cl_softrtry = 0; |
692 | if (server->flags & NFS_MOUNT_SOFT) | 684 | if (server->flags & NFS_MOUNT_SOFT) |
693 | server->client->cl_softrtry = 1; | 685 | server->client->cl_softrtry = 1; |
@@ -761,6 +753,8 @@ static int nfs_init_server(struct nfs_server *server, | |||
761 | data->timeo, data->retrans); | 753 | data->timeo, data->retrans); |
762 | if (data->flags & NFS_MOUNT_NORESVPORT) | 754 | if (data->flags & NFS_MOUNT_NORESVPORT) |
763 | set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); | 755 | set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); |
756 | if (server->options & NFS_OPTION_MIGRATION) | ||
757 | set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); | ||
764 | 758 | ||
765 | /* Allocate or find a client reference we can use */ | 759 | /* Allocate or find a client reference we can use */ |
766 | clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX); | 760 | clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX); |
@@ -855,7 +849,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, | |||
855 | if (server->wsize > NFS_MAX_FILE_IO_SIZE) | 849 | if (server->wsize > NFS_MAX_FILE_IO_SIZE) |
856 | server->wsize = NFS_MAX_FILE_IO_SIZE; | 850 | server->wsize = NFS_MAX_FILE_IO_SIZE; |
857 | server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 851 | server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
858 | server->pnfs_blksize = fsinfo->blksize; | ||
859 | 852 | ||
860 | server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); | 853 | server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); |
861 | 854 | ||
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 627f108ede23..ce8cb926526b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -2072,7 +2072,7 @@ found: | |||
2072 | nfs_access_free_entry(entry); | 2072 | nfs_access_free_entry(entry); |
2073 | } | 2073 | } |
2074 | 2074 | ||
2075 | static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) | 2075 | void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) |
2076 | { | 2076 | { |
2077 | struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); | 2077 | struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); |
2078 | if (cache == NULL) | 2078 | if (cache == NULL) |
@@ -2098,6 +2098,20 @@ static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *s | |||
2098 | spin_unlock(&nfs_access_lru_lock); | 2098 | spin_unlock(&nfs_access_lru_lock); |
2099 | } | 2099 | } |
2100 | } | 2100 | } |
2101 | EXPORT_SYMBOL_GPL(nfs_access_add_cache); | ||
2102 | |||
2103 | void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result) | ||
2104 | { | ||
2105 | entry->mask = 0; | ||
2106 | if (access_result & NFS4_ACCESS_READ) | ||
2107 | entry->mask |= MAY_READ; | ||
2108 | if (access_result & | ||
2109 | (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE)) | ||
2110 | entry->mask |= MAY_WRITE; | ||
2111 | if (access_result & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) | ||
2112 | entry->mask |= MAY_EXEC; | ||
2113 | } | ||
2114 | EXPORT_SYMBOL_GPL(nfs_access_set_mask); | ||
2101 | 2115 | ||
2102 | static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) | 2116 | static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) |
2103 | { | 2117 | { |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1ba385b7c90d..cae26cbd59ee 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <linux/kref.h> | 46 | #include <linux/kref.h> |
47 | #include <linux/slab.h> | 47 | #include <linux/slab.h> |
48 | #include <linux/task_io_accounting_ops.h> | 48 | #include <linux/task_io_accounting_ops.h> |
49 | #include <linux/module.h> | ||
49 | 50 | ||
50 | #include <linux/nfs_fs.h> | 51 | #include <linux/nfs_fs.h> |
51 | #include <linux/nfs_page.h> | 52 | #include <linux/nfs_page.h> |
@@ -78,6 +79,7 @@ struct nfs_direct_req { | |||
78 | atomic_t io_count; /* i/os we're waiting for */ | 79 | atomic_t io_count; /* i/os we're waiting for */ |
79 | spinlock_t lock; /* protect completion state */ | 80 | spinlock_t lock; /* protect completion state */ |
80 | ssize_t count, /* bytes actually processed */ | 81 | ssize_t count, /* bytes actually processed */ |
82 | bytes_left, /* bytes left to be sent */ | ||
81 | error; /* any reported error */ | 83 | error; /* any reported error */ |
82 | struct completion completion; /* wait for i/o completion */ | 84 | struct completion completion; /* wait for i/o completion */ |
83 | 85 | ||
@@ -190,6 +192,12 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq) | |||
190 | kref_put(&dreq->kref, nfs_direct_req_free); | 192 | kref_put(&dreq->kref, nfs_direct_req_free); |
191 | } | 193 | } |
192 | 194 | ||
195 | ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq) | ||
196 | { | ||
197 | return dreq->bytes_left; | ||
198 | } | ||
199 | EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left); | ||
200 | |||
193 | /* | 201 | /* |
194 | * Collects and returns the final error value/byte-count. | 202 | * Collects and returns the final error value/byte-count. |
195 | */ | 203 | */ |
@@ -390,6 +398,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de | |||
390 | user_addr += req_len; | 398 | user_addr += req_len; |
391 | pos += req_len; | 399 | pos += req_len; |
392 | count -= req_len; | 400 | count -= req_len; |
401 | dreq->bytes_left -= req_len; | ||
393 | } | 402 | } |
394 | /* The nfs_page now hold references to these pages */ | 403 | /* The nfs_page now hold references to these pages */ |
395 | nfs_direct_release_pages(pagevec, npages); | 404 | nfs_direct_release_pages(pagevec, npages); |
@@ -450,23 +459,28 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, | |||
450 | ssize_t result = -ENOMEM; | 459 | ssize_t result = -ENOMEM; |
451 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 460 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
452 | struct nfs_direct_req *dreq; | 461 | struct nfs_direct_req *dreq; |
462 | struct nfs_lock_context *l_ctx; | ||
453 | 463 | ||
454 | dreq = nfs_direct_req_alloc(); | 464 | dreq = nfs_direct_req_alloc(); |
455 | if (dreq == NULL) | 465 | if (dreq == NULL) |
456 | goto out; | 466 | goto out; |
457 | 467 | ||
458 | dreq->inode = inode; | 468 | dreq->inode = inode; |
469 | dreq->bytes_left = iov_length(iov, nr_segs); | ||
459 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); | 470 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); |
460 | dreq->l_ctx = nfs_get_lock_context(dreq->ctx); | 471 | l_ctx = nfs_get_lock_context(dreq->ctx); |
461 | if (dreq->l_ctx == NULL) | 472 | if (IS_ERR(l_ctx)) { |
473 | result = PTR_ERR(l_ctx); | ||
462 | goto out_release; | 474 | goto out_release; |
475 | } | ||
476 | dreq->l_ctx = l_ctx; | ||
463 | if (!is_sync_kiocb(iocb)) | 477 | if (!is_sync_kiocb(iocb)) |
464 | dreq->iocb = iocb; | 478 | dreq->iocb = iocb; |
465 | 479 | ||
480 | NFS_I(inode)->read_io += iov_length(iov, nr_segs); | ||
466 | result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio); | 481 | result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio); |
467 | if (!result) | 482 | if (!result) |
468 | result = nfs_direct_wait(dreq); | 483 | result = nfs_direct_wait(dreq); |
469 | NFS_I(inode)->read_io += result; | ||
470 | out_release: | 484 | out_release: |
471 | nfs_direct_req_release(dreq); | 485 | nfs_direct_req_release(dreq); |
472 | out: | 486 | out: |
@@ -706,6 +720,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d | |||
706 | user_addr += req_len; | 720 | user_addr += req_len; |
707 | pos += req_len; | 721 | pos += req_len; |
708 | count -= req_len; | 722 | count -= req_len; |
723 | dreq->bytes_left -= req_len; | ||
709 | } | 724 | } |
710 | /* The nfs_page now hold references to these pages */ | 725 | /* The nfs_page now hold references to these pages */ |
711 | nfs_direct_release_pages(pagevec, npages); | 726 | nfs_direct_release_pages(pagevec, npages); |
@@ -814,6 +829,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, | |||
814 | get_dreq(dreq); | 829 | get_dreq(dreq); |
815 | atomic_inc(&inode->i_dio_count); | 830 | atomic_inc(&inode->i_dio_count); |
816 | 831 | ||
832 | NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs); | ||
817 | for (seg = 0; seg < nr_segs; seg++) { | 833 | for (seg = 0; seg < nr_segs; seg++) { |
818 | const struct iovec *vec = &iov[seg]; | 834 | const struct iovec *vec = &iov[seg]; |
819 | result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); | 835 | result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); |
@@ -825,7 +841,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, | |||
825 | pos += vec->iov_len; | 841 | pos += vec->iov_len; |
826 | } | 842 | } |
827 | nfs_pageio_complete(&desc); | 843 | nfs_pageio_complete(&desc); |
828 | NFS_I(dreq->inode)->write_io += desc.pg_bytes_written; | ||
829 | 844 | ||
830 | /* | 845 | /* |
831 | * If no bytes were started, return the error, and let the | 846 | * If no bytes were started, return the error, and let the |
@@ -849,16 +864,21 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
849 | ssize_t result = -ENOMEM; | 864 | ssize_t result = -ENOMEM; |
850 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 865 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
851 | struct nfs_direct_req *dreq; | 866 | struct nfs_direct_req *dreq; |
867 | struct nfs_lock_context *l_ctx; | ||
852 | 868 | ||
853 | dreq = nfs_direct_req_alloc(); | 869 | dreq = nfs_direct_req_alloc(); |
854 | if (!dreq) | 870 | if (!dreq) |
855 | goto out; | 871 | goto out; |
856 | 872 | ||
857 | dreq->inode = inode; | 873 | dreq->inode = inode; |
874 | dreq->bytes_left = count; | ||
858 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); | 875 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); |
859 | dreq->l_ctx = nfs_get_lock_context(dreq->ctx); | 876 | l_ctx = nfs_get_lock_context(dreq->ctx); |
860 | if (dreq->l_ctx == NULL) | 877 | if (IS_ERR(l_ctx)) { |
878 | result = PTR_ERR(l_ctx); | ||
861 | goto out_release; | 879 | goto out_release; |
880 | } | ||
881 | dreq->l_ctx = l_ctx; | ||
862 | if (!is_sync_kiocb(iocb)) | 882 | if (!is_sync_kiocb(iocb)) |
863 | dreq->iocb = iocb; | 883 | dreq->iocb = iocb; |
864 | 884 | ||
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f692be97676d..582bb8866131 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -259,7 +259,7 @@ nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) | |||
259 | struct dentry *dentry = file->f_path.dentry; | 259 | struct dentry *dentry = file->f_path.dentry; |
260 | struct nfs_open_context *ctx = nfs_file_open_context(file); | 260 | struct nfs_open_context *ctx = nfs_file_open_context(file); |
261 | struct inode *inode = dentry->d_inode; | 261 | struct inode *inode = dentry->d_inode; |
262 | int have_error, status; | 262 | int have_error, do_resend, status; |
263 | int ret = 0; | 263 | int ret = 0; |
264 | 264 | ||
265 | dprintk("NFS: fsync file(%s/%s) datasync %d\n", | 265 | dprintk("NFS: fsync file(%s/%s) datasync %d\n", |
@@ -267,15 +267,23 @@ nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) | |||
267 | datasync); | 267 | datasync); |
268 | 268 | ||
269 | nfs_inc_stats(inode, NFSIOS_VFSFSYNC); | 269 | nfs_inc_stats(inode, NFSIOS_VFSFSYNC); |
270 | do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); | ||
270 | have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); | 271 | have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); |
271 | status = nfs_commit_inode(inode, FLUSH_SYNC); | 272 | status = nfs_commit_inode(inode, FLUSH_SYNC); |
272 | if (status >= 0 && ret < 0) | ||
273 | status = ret; | ||
274 | have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); | 273 | have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); |
275 | if (have_error) | 274 | if (have_error) { |
276 | ret = xchg(&ctx->error, 0); | 275 | ret = xchg(&ctx->error, 0); |
277 | if (!ret && status < 0) | 276 | if (ret) |
277 | goto out; | ||
278 | } | ||
279 | if (status < 0) { | ||
278 | ret = status; | 280 | ret = status; |
281 | goto out; | ||
282 | } | ||
283 | do_resend |= test_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); | ||
284 | if (do_resend) | ||
285 | ret = -EAGAIN; | ||
286 | out: | ||
279 | return ret; | 287 | return ret; |
280 | } | 288 | } |
281 | EXPORT_SYMBOL_GPL(nfs_file_fsync_commit); | 289 | EXPORT_SYMBOL_GPL(nfs_file_fsync_commit); |
@@ -286,13 +294,22 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
286 | int ret; | 294 | int ret; |
287 | struct inode *inode = file->f_path.dentry->d_inode; | 295 | struct inode *inode = file->f_path.dentry->d_inode; |
288 | 296 | ||
289 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 297 | do { |
290 | if (ret != 0) | 298 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); |
291 | goto out; | 299 | if (ret != 0) |
292 | mutex_lock(&inode->i_mutex); | 300 | break; |
293 | ret = nfs_file_fsync_commit(file, start, end, datasync); | 301 | mutex_lock(&inode->i_mutex); |
294 | mutex_unlock(&inode->i_mutex); | 302 | ret = nfs_file_fsync_commit(file, start, end, datasync); |
295 | out: | 303 | mutex_unlock(&inode->i_mutex); |
304 | /* | ||
305 | * If nfs_file_fsync_commit detected a server reboot, then | ||
306 | * resend all dirty pages that might have been covered by | ||
307 | * the NFS_CONTEXT_RESEND_WRITES flag | ||
308 | */ | ||
309 | start = 0; | ||
310 | end = LLONG_MAX; | ||
311 | } while (ret == -EAGAIN); | ||
312 | |||
296 | return ret; | 313 | return ret; |
297 | } | 314 | } |
298 | 315 | ||
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 4654ced096a6..033803c36644 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c | |||
@@ -32,6 +32,8 @@ | |||
32 | 32 | ||
33 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
34 | 34 | ||
35 | #include "internal.h" | ||
36 | |||
35 | #define NFSDBG_FACILITY NFSDBG_CLIENT | 37 | #define NFSDBG_FACILITY NFSDBG_CLIENT |
36 | 38 | ||
37 | /* | 39 | /* |
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index a850079467d8..9cc4a3fbf4b0 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c | |||
@@ -55,18 +55,19 @@ | |||
55 | static const struct cred *id_resolver_cache; | 55 | static const struct cred *id_resolver_cache; |
56 | static struct key_type key_type_id_resolver_legacy; | 56 | static struct key_type key_type_id_resolver_legacy; |
57 | 57 | ||
58 | struct idmap { | ||
59 | struct rpc_pipe *idmap_pipe; | ||
60 | struct key_construction *idmap_key_cons; | ||
61 | struct mutex idmap_mutex; | ||
62 | }; | ||
63 | |||
64 | struct idmap_legacy_upcalldata { | 58 | struct idmap_legacy_upcalldata { |
65 | struct rpc_pipe_msg pipe_msg; | 59 | struct rpc_pipe_msg pipe_msg; |
66 | struct idmap_msg idmap_msg; | 60 | struct idmap_msg idmap_msg; |
61 | struct key_construction *key_cons; | ||
67 | struct idmap *idmap; | 62 | struct idmap *idmap; |
68 | }; | 63 | }; |
69 | 64 | ||
65 | struct idmap { | ||
66 | struct rpc_pipe *idmap_pipe; | ||
67 | struct idmap_legacy_upcalldata *idmap_upcall_data; | ||
68 | struct mutex idmap_mutex; | ||
69 | }; | ||
70 | |||
70 | /** | 71 | /** |
71 | * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields | 72 | * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields |
72 | * @fattr: fully initialised struct nfs_fattr | 73 | * @fattr: fully initialised struct nfs_fattr |
@@ -158,7 +159,7 @@ static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *re | |||
158 | return 0; | 159 | return 0; |
159 | memcpy(buf, name, namelen); | 160 | memcpy(buf, name, namelen); |
160 | buf[namelen] = '\0'; | 161 | buf[namelen] = '\0'; |
161 | if (strict_strtoul(buf, 0, &val) != 0) | 162 | if (kstrtoul(buf, 0, &val) != 0) |
162 | return 0; | 163 | return 0; |
163 | *res = val; | 164 | *res = val; |
164 | return 1; | 165 | return 1; |
@@ -330,7 +331,6 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, | |||
330 | ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, | 331 | ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, |
331 | name, namelen, type, data, | 332 | name, namelen, type, data, |
332 | data_size, idmap); | 333 | data_size, idmap); |
333 | idmap->idmap_key_cons = NULL; | ||
334 | mutex_unlock(&idmap->idmap_mutex); | 334 | mutex_unlock(&idmap->idmap_mutex); |
335 | } | 335 | } |
336 | return ret; | 336 | return ret; |
@@ -364,7 +364,7 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *typ | |||
364 | if (data_size <= 0) { | 364 | if (data_size <= 0) { |
365 | ret = -EINVAL; | 365 | ret = -EINVAL; |
366 | } else { | 366 | } else { |
367 | ret = strict_strtol(id_str, 10, &id_long); | 367 | ret = kstrtol(id_str, 10, &id_long); |
368 | *id = (__u32)id_long; | 368 | *id = (__u32)id_long; |
369 | } | 369 | } |
370 | return ret; | 370 | return ret; |
@@ -465,8 +465,6 @@ nfs_idmap_new(struct nfs_client *clp) | |||
465 | struct rpc_pipe *pipe; | 465 | struct rpc_pipe *pipe; |
466 | int error; | 466 | int error; |
467 | 467 | ||
468 | BUG_ON(clp->cl_idmap != NULL); | ||
469 | |||
470 | idmap = kzalloc(sizeof(*idmap), GFP_KERNEL); | 468 | idmap = kzalloc(sizeof(*idmap), GFP_KERNEL); |
471 | if (idmap == NULL) | 469 | if (idmap == NULL) |
472 | return -ENOMEM; | 470 | return -ENOMEM; |
@@ -510,7 +508,6 @@ static int __rpc_pipefs_event(struct nfs_client *clp, unsigned long event, | |||
510 | 508 | ||
511 | switch (event) { | 509 | switch (event) { |
512 | case RPC_PIPEFS_MOUNT: | 510 | case RPC_PIPEFS_MOUNT: |
513 | BUG_ON(clp->cl_rpcclient->cl_dentry == NULL); | ||
514 | err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry, | 511 | err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry, |
515 | clp->cl_idmap, | 512 | clp->cl_idmap, |
516 | clp->cl_idmap->idmap_pipe); | 513 | clp->cl_idmap->idmap_pipe); |
@@ -632,9 +629,6 @@ static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap, | |||
632 | substring_t substr; | 629 | substring_t substr; |
633 | int token, ret; | 630 | int token, ret; |
634 | 631 | ||
635 | memset(im, 0, sizeof(*im)); | ||
636 | memset(msg, 0, sizeof(*msg)); | ||
637 | |||
638 | im->im_type = IDMAP_TYPE_GROUP; | 632 | im->im_type = IDMAP_TYPE_GROUP; |
639 | token = match_token(desc, nfs_idmap_tokens, &substr); | 633 | token = match_token(desc, nfs_idmap_tokens, &substr); |
640 | 634 | ||
@@ -665,6 +659,35 @@ out: | |||
665 | return ret; | 659 | return ret; |
666 | } | 660 | } |
667 | 661 | ||
662 | static bool | ||
663 | nfs_idmap_prepare_pipe_upcall(struct idmap *idmap, | ||
664 | struct idmap_legacy_upcalldata *data) | ||
665 | { | ||
666 | if (idmap->idmap_upcall_data != NULL) { | ||
667 | WARN_ON_ONCE(1); | ||
668 | return false; | ||
669 | } | ||
670 | idmap->idmap_upcall_data = data; | ||
671 | return true; | ||
672 | } | ||
673 | |||
674 | static void | ||
675 | nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret) | ||
676 | { | ||
677 | struct key_construction *cons = idmap->idmap_upcall_data->key_cons; | ||
678 | |||
679 | kfree(idmap->idmap_upcall_data); | ||
680 | idmap->idmap_upcall_data = NULL; | ||
681 | complete_request_key(cons, ret); | ||
682 | } | ||
683 | |||
684 | static void | ||
685 | nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret) | ||
686 | { | ||
687 | if (idmap->idmap_upcall_data != NULL) | ||
688 | nfs_idmap_complete_pipe_upcall_locked(idmap, ret); | ||
689 | } | ||
690 | |||
668 | static int nfs_idmap_legacy_upcall(struct key_construction *cons, | 691 | static int nfs_idmap_legacy_upcall(struct key_construction *cons, |
669 | const char *op, | 692 | const char *op, |
670 | void *aux) | 693 | void *aux) |
@@ -677,29 +700,28 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, | |||
677 | int ret = -ENOMEM; | 700 | int ret = -ENOMEM; |
678 | 701 | ||
679 | /* msg and im are freed in idmap_pipe_destroy_msg */ | 702 | /* msg and im are freed in idmap_pipe_destroy_msg */ |
680 | data = kmalloc(sizeof(*data), GFP_KERNEL); | 703 | data = kzalloc(sizeof(*data), GFP_KERNEL); |
681 | if (!data) | 704 | if (!data) |
682 | goto out1; | 705 | goto out1; |
683 | 706 | ||
684 | msg = &data->pipe_msg; | 707 | msg = &data->pipe_msg; |
685 | im = &data->idmap_msg; | 708 | im = &data->idmap_msg; |
686 | data->idmap = idmap; | 709 | data->idmap = idmap; |
710 | data->key_cons = cons; | ||
687 | 711 | ||
688 | ret = nfs_idmap_prepare_message(key->description, idmap, im, msg); | 712 | ret = nfs_idmap_prepare_message(key->description, idmap, im, msg); |
689 | if (ret < 0) | 713 | if (ret < 0) |
690 | goto out2; | 714 | goto out2; |
691 | 715 | ||
692 | BUG_ON(idmap->idmap_key_cons != NULL); | 716 | ret = -EAGAIN; |
693 | idmap->idmap_key_cons = cons; | 717 | if (!nfs_idmap_prepare_pipe_upcall(idmap, data)) |
718 | goto out2; | ||
694 | 719 | ||
695 | ret = rpc_queue_upcall(idmap->idmap_pipe, msg); | 720 | ret = rpc_queue_upcall(idmap->idmap_pipe, msg); |
696 | if (ret < 0) | 721 | if (ret < 0) |
697 | goto out3; | 722 | nfs_idmap_abort_pipe_upcall(idmap, ret); |
698 | 723 | ||
699 | return ret; | 724 | return ret; |
700 | |||
701 | out3: | ||
702 | idmap->idmap_key_cons = NULL; | ||
703 | out2: | 725 | out2: |
704 | kfree(data); | 726 | kfree(data); |
705 | out1: | 727 | out1: |
@@ -714,21 +736,32 @@ static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *dat | |||
714 | authkey); | 736 | authkey); |
715 | } | 737 | } |
716 | 738 | ||
717 | static int nfs_idmap_read_message(struct idmap_msg *im, struct key *key, struct key *authkey) | 739 | static int nfs_idmap_read_and_verify_message(struct idmap_msg *im, |
740 | struct idmap_msg *upcall, | ||
741 | struct key *key, struct key *authkey) | ||
718 | { | 742 | { |
719 | char id_str[NFS_UINT_MAXLEN]; | 743 | char id_str[NFS_UINT_MAXLEN]; |
720 | int ret = -EINVAL; | 744 | int ret = -ENOKEY; |
721 | 745 | ||
746 | /* ret = -ENOKEY */ | ||
747 | if (upcall->im_type != im->im_type || upcall->im_conv != im->im_conv) | ||
748 | goto out; | ||
722 | switch (im->im_conv) { | 749 | switch (im->im_conv) { |
723 | case IDMAP_CONV_NAMETOID: | 750 | case IDMAP_CONV_NAMETOID: |
751 | if (strcmp(upcall->im_name, im->im_name) != 0) | ||
752 | break; | ||
724 | sprintf(id_str, "%d", im->im_id); | 753 | sprintf(id_str, "%d", im->im_id); |
725 | ret = nfs_idmap_instantiate(key, authkey, id_str); | 754 | ret = nfs_idmap_instantiate(key, authkey, id_str); |
726 | break; | 755 | break; |
727 | case IDMAP_CONV_IDTONAME: | 756 | case IDMAP_CONV_IDTONAME: |
757 | if (upcall->im_id != im->im_id) | ||
758 | break; | ||
728 | ret = nfs_idmap_instantiate(key, authkey, im->im_name); | 759 | ret = nfs_idmap_instantiate(key, authkey, im->im_name); |
729 | break; | 760 | break; |
761 | default: | ||
762 | ret = -EINVAL; | ||
730 | } | 763 | } |
731 | 764 | out: | |
732 | return ret; | 765 | return ret; |
733 | } | 766 | } |
734 | 767 | ||
@@ -740,14 +773,16 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) | |||
740 | struct key_construction *cons; | 773 | struct key_construction *cons; |
741 | struct idmap_msg im; | 774 | struct idmap_msg im; |
742 | size_t namelen_in; | 775 | size_t namelen_in; |
743 | int ret; | 776 | int ret = -ENOKEY; |
744 | 777 | ||
745 | /* If instantiation is successful, anyone waiting for key construction | 778 | /* If instantiation is successful, anyone waiting for key construction |
746 | * will have been woken up and someone else may now have used | 779 | * will have been woken up and someone else may now have used |
747 | * idmap_key_cons - so after this point we may no longer touch it. | 780 | * idmap_key_cons - so after this point we may no longer touch it. |
748 | */ | 781 | */ |
749 | cons = ACCESS_ONCE(idmap->idmap_key_cons); | 782 | if (idmap->idmap_upcall_data == NULL) |
750 | idmap->idmap_key_cons = NULL; | 783 | goto out_noupcall; |
784 | |||
785 | cons = idmap->idmap_upcall_data->key_cons; | ||
751 | 786 | ||
752 | if (mlen != sizeof(im)) { | 787 | if (mlen != sizeof(im)) { |
753 | ret = -ENOSPC; | 788 | ret = -ENOSPC; |
@@ -768,16 +803,19 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) | |||
768 | if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) { | 803 | if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) { |
769 | ret = -EINVAL; | 804 | ret = -EINVAL; |
770 | goto out; | 805 | goto out; |
771 | } | 806 | } |
772 | 807 | ||
773 | ret = nfs_idmap_read_message(&im, cons->key, cons->authkey); | 808 | ret = nfs_idmap_read_and_verify_message(&im, |
809 | &idmap->idmap_upcall_data->idmap_msg, | ||
810 | cons->key, cons->authkey); | ||
774 | if (ret >= 0) { | 811 | if (ret >= 0) { |
775 | key_set_timeout(cons->key, nfs_idmap_cache_timeout); | 812 | key_set_timeout(cons->key, nfs_idmap_cache_timeout); |
776 | ret = mlen; | 813 | ret = mlen; |
777 | } | 814 | } |
778 | 815 | ||
779 | out: | 816 | out: |
780 | complete_request_key(cons, ret); | 817 | nfs_idmap_complete_pipe_upcall_locked(idmap, ret); |
818 | out_noupcall: | ||
781 | return ret; | 819 | return ret; |
782 | } | 820 | } |
783 | 821 | ||
@@ -788,14 +826,9 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) | |||
788 | struct idmap_legacy_upcalldata, | 826 | struct idmap_legacy_upcalldata, |
789 | pipe_msg); | 827 | pipe_msg); |
790 | struct idmap *idmap = data->idmap; | 828 | struct idmap *idmap = data->idmap; |
791 | struct key_construction *cons; | 829 | |
792 | if (msg->errno) { | 830 | if (msg->errno) |
793 | cons = ACCESS_ONCE(idmap->idmap_key_cons); | 831 | nfs_idmap_abort_pipe_upcall(idmap, msg->errno); |
794 | idmap->idmap_key_cons = NULL; | ||
795 | complete_request_key(cons, msg->errno); | ||
796 | } | ||
797 | /* Free memory allocated in nfs_idmap_legacy_upcall() */ | ||
798 | kfree(data); | ||
799 | } | 832 | } |
800 | 833 | ||
801 | static void | 834 | static void |
@@ -803,7 +836,8 @@ idmap_release_pipe(struct inode *inode) | |||
803 | { | 836 | { |
804 | struct rpc_inode *rpci = RPC_I(inode); | 837 | struct rpc_inode *rpci = RPC_I(inode); |
805 | struct idmap *idmap = (struct idmap *)rpci->private; | 838 | struct idmap *idmap = (struct idmap *)rpci->private; |
806 | idmap->idmap_key_cons = NULL; | 839 | |
840 | nfs_idmap_abort_pipe_upcall(idmap, -EPIPE); | ||
807 | } | 841 | } |
808 | 842 | ||
809 | int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) | 843 | int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e4c716d374a8..5c7325c5c5e6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -547,8 +547,8 @@ EXPORT_SYMBOL_GPL(nfs_getattr); | |||
547 | static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) | 547 | static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) |
548 | { | 548 | { |
549 | atomic_set(&l_ctx->count, 1); | 549 | atomic_set(&l_ctx->count, 1); |
550 | l_ctx->lockowner = current->files; | 550 | l_ctx->lockowner.l_owner = current->files; |
551 | l_ctx->pid = current->tgid; | 551 | l_ctx->lockowner.l_pid = current->tgid; |
552 | INIT_LIST_HEAD(&l_ctx->list); | 552 | INIT_LIST_HEAD(&l_ctx->list); |
553 | } | 553 | } |
554 | 554 | ||
@@ -557,9 +557,9 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context | |||
557 | struct nfs_lock_context *pos; | 557 | struct nfs_lock_context *pos; |
558 | 558 | ||
559 | list_for_each_entry(pos, &ctx->lock_context.list, list) { | 559 | list_for_each_entry(pos, &ctx->lock_context.list, list) { |
560 | if (pos->lockowner != current->files) | 560 | if (pos->lockowner.l_owner != current->files) |
561 | continue; | 561 | continue; |
562 | if (pos->pid != current->tgid) | 562 | if (pos->lockowner.l_pid != current->tgid) |
563 | continue; | 563 | continue; |
564 | atomic_inc(&pos->count); | 564 | atomic_inc(&pos->count); |
565 | return pos; | 565 | return pos; |
@@ -578,7 +578,7 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) | |||
578 | spin_unlock(&inode->i_lock); | 578 | spin_unlock(&inode->i_lock); |
579 | new = kmalloc(sizeof(*new), GFP_KERNEL); | 579 | new = kmalloc(sizeof(*new), GFP_KERNEL); |
580 | if (new == NULL) | 580 | if (new == NULL) |
581 | return NULL; | 581 | return ERR_PTR(-ENOMEM); |
582 | nfs_init_lock_context(new); | 582 | nfs_init_lock_context(new); |
583 | spin_lock(&inode->i_lock); | 583 | spin_lock(&inode->i_lock); |
584 | res = __nfs_find_lock_context(ctx); | 584 | res = __nfs_find_lock_context(ctx); |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 31fdb03225cd..59b133c5d652 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -101,11 +101,11 @@ struct nfs_client_initdata { | |||
101 | */ | 101 | */ |
102 | struct nfs_parsed_mount_data { | 102 | struct nfs_parsed_mount_data { |
103 | int flags; | 103 | int flags; |
104 | int rsize, wsize; | 104 | unsigned int rsize, wsize; |
105 | int timeo, retrans; | 105 | unsigned int timeo, retrans; |
106 | int acregmin, acregmax, | 106 | unsigned int acregmin, acregmax, |
107 | acdirmin, acdirmax; | 107 | acdirmin, acdirmax; |
108 | int namlen; | 108 | unsigned int namlen; |
109 | unsigned int options; | 109 | unsigned int options; |
110 | unsigned int bsize; | 110 | unsigned int bsize; |
111 | unsigned int auth_flavor_len; | 111 | unsigned int auth_flavor_len; |
@@ -464,6 +464,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode) | |||
464 | { | 464 | { |
465 | inode_dio_wait(inode); | 465 | inode_dio_wait(inode); |
466 | } | 466 | } |
467 | extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); | ||
467 | 468 | ||
468 | /* nfs4proc.c */ | 469 | /* nfs4proc.c */ |
469 | extern void __nfs4_read_done_cb(struct nfs_read_data *); | 470 | extern void __nfs4_read_done_cb(struct nfs_read_data *); |
@@ -483,6 +484,12 @@ extern int _nfs4_call_sync_session(struct rpc_clnt *clnt, | |||
483 | struct nfs4_sequence_args *args, | 484 | struct nfs4_sequence_args *args, |
484 | struct nfs4_sequence_res *res, | 485 | struct nfs4_sequence_res *res, |
485 | int cache_reply); | 486 | int cache_reply); |
487 | extern int nfs40_walk_client_list(struct nfs_client *clp, | ||
488 | struct nfs_client **result, | ||
489 | struct rpc_cred *cred); | ||
490 | extern int nfs41_walk_client_list(struct nfs_client *clp, | ||
491 | struct nfs_client **result, | ||
492 | struct rpc_cred *cred); | ||
486 | 493 | ||
487 | /* | 494 | /* |
488 | * Determine the device name as a string | 495 | * Determine the device name as a string |
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index 0539de1b8d1f..8ee1fab83268 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h | |||
@@ -5,6 +5,7 @@ | |||
5 | #ifndef __NFS_NETNS_H__ | 5 | #ifndef __NFS_NETNS_H__ |
6 | #define __NFS_NETNS_H__ | 6 | #define __NFS_NETNS_H__ |
7 | 7 | ||
8 | #include <linux/nfs4.h> | ||
8 | #include <net/net_namespace.h> | 9 | #include <net/net_namespace.h> |
9 | #include <net/netns/generic.h> | 10 | #include <net/netns/generic.h> |
10 | 11 | ||
@@ -22,6 +23,9 @@ struct nfs_net { | |||
22 | struct list_head nfs_volume_list; | 23 | struct list_head nfs_volume_list; |
23 | #if IS_ENABLED(CONFIG_NFS_V4) | 24 | #if IS_ENABLED(CONFIG_NFS_V4) |
24 | struct idr cb_ident_idr; /* Protected by nfs_client_lock */ | 25 | struct idr cb_ident_idr; /* Protected by nfs_client_lock */ |
26 | unsigned short nfs_callback_tcpport; | ||
27 | unsigned short nfs_callback_tcpport6; | ||
28 | int cb_users[NFS4_MAX_MINOR_VERSION + 1]; | ||
25 | #endif | 29 | #endif |
26 | spinlock_t nfs_client_lock; | 30 | spinlock_t nfs_client_lock; |
27 | struct timespec boot_time; | 31 | struct timespec boot_time; |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index da0618aeeadb..a525fdefccde 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -132,8 +132,8 @@ struct nfs4_lock_owner { | |||
132 | struct nfs4_lock_state { | 132 | struct nfs4_lock_state { |
133 | struct list_head ls_locks; /* Other lock stateids */ | 133 | struct list_head ls_locks; /* Other lock stateids */ |
134 | struct nfs4_state * ls_state; /* Pointer to open state */ | 134 | struct nfs4_state * ls_state; /* Pointer to open state */ |
135 | #define NFS_LOCK_INITIALIZED 1 | 135 | #define NFS_LOCK_INITIALIZED 0 |
136 | int ls_flags; | 136 | unsigned long ls_flags; |
137 | struct nfs_seqid_counter ls_seqid; | 137 | struct nfs_seqid_counter ls_seqid; |
138 | nfs4_stateid ls_stateid; | 138 | nfs4_stateid ls_stateid; |
139 | atomic_t ls_count; | 139 | atomic_t ls_count; |
@@ -191,6 +191,8 @@ struct nfs4_state_recovery_ops { | |||
191 | int (*establish_clid)(struct nfs_client *, struct rpc_cred *); | 191 | int (*establish_clid)(struct nfs_client *, struct rpc_cred *); |
192 | struct rpc_cred * (*get_clid_cred)(struct nfs_client *); | 192 | struct rpc_cred * (*get_clid_cred)(struct nfs_client *); |
193 | int (*reclaim_complete)(struct nfs_client *); | 193 | int (*reclaim_complete)(struct nfs_client *); |
194 | int (*detect_trunking)(struct nfs_client *, struct nfs_client **, | ||
195 | struct rpc_cred *); | ||
194 | }; | 196 | }; |
195 | 197 | ||
196 | struct nfs4_state_maintenance_ops { | 198 | struct nfs4_state_maintenance_ops { |
@@ -223,7 +225,7 @@ extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); | |||
223 | extern int nfs4_destroy_clientid(struct nfs_client *clp); | 225 | extern int nfs4_destroy_clientid(struct nfs_client *clp); |
224 | extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); | 226 | extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); |
225 | extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); | 227 | extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); |
226 | extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); | 228 | extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait); |
227 | extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); | 229 | extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); |
228 | extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *, | 230 | extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *, |
229 | struct nfs4_fs_locations *, struct page *); | 231 | struct nfs4_fs_locations *, struct page *); |
@@ -320,9 +322,15 @@ extern void nfs4_renew_state(struct work_struct *); | |||
320 | /* nfs4state.c */ | 322 | /* nfs4state.c */ |
321 | struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp); | 323 | struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp); |
322 | struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp); | 324 | struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp); |
325 | int nfs4_discover_server_trunking(struct nfs_client *clp, | ||
326 | struct nfs_client **); | ||
327 | int nfs40_discover_server_trunking(struct nfs_client *clp, | ||
328 | struct nfs_client **, struct rpc_cred *); | ||
323 | #if defined(CONFIG_NFS_V4_1) | 329 | #if defined(CONFIG_NFS_V4_1) |
324 | struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); | 330 | struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); |
325 | struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); | 331 | struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); |
332 | int nfs41_discover_server_trunking(struct nfs_client *clp, | ||
333 | struct nfs_client **, struct rpc_cred *); | ||
326 | extern void nfs4_schedule_session_recovery(struct nfs4_session *, int); | 334 | extern void nfs4_schedule_session_recovery(struct nfs4_session *, int); |
327 | #else | 335 | #else |
328 | static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) | 336 | static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) |
@@ -351,7 +359,7 @@ extern void nfs41_handle_server_scope(struct nfs_client *, | |||
351 | extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); | 359 | extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); |
352 | extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); | 360 | extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); |
353 | extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, | 361 | extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, |
354 | fmode_t, fl_owner_t, pid_t); | 362 | fmode_t, const struct nfs_lockowner *); |
355 | 363 | ||
356 | extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); | 364 | extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); |
357 | extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); | 365 | extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); |
@@ -372,6 +380,9 @@ extern bool nfs4_disable_idmapping; | |||
372 | extern unsigned short max_session_slots; | 380 | extern unsigned short max_session_slots; |
373 | extern unsigned short send_implementation_id; | 381 | extern unsigned short send_implementation_id; |
374 | 382 | ||
383 | #define NFS4_CLIENT_ID_UNIQ_LEN (64) | ||
384 | extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN]; | ||
385 | |||
375 | /* nfs4sysctl.c */ | 386 | /* nfs4sysctl.c */ |
376 | #ifdef CONFIG_SYSCTL | 387 | #ifdef CONFIG_SYSCTL |
377 | int nfs4_register_sysctl(void); | 388 | int nfs4_register_sysctl(void); |
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 24eb663f8ed5..6bacfde1319a 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c | |||
@@ -84,7 +84,7 @@ error: | |||
84 | static void nfs4_destroy_callback(struct nfs_client *clp) | 84 | static void nfs4_destroy_callback(struct nfs_client *clp) |
85 | { | 85 | { |
86 | if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) | 86 | if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) |
87 | nfs_callback_down(clp->cl_mvops->minor_version); | 87 | nfs_callback_down(clp->cl_mvops->minor_version, clp->cl_net); |
88 | } | 88 | } |
89 | 89 | ||
90 | static void nfs4_shutdown_client(struct nfs_client *clp) | 90 | static void nfs4_shutdown_client(struct nfs_client *clp) |
@@ -185,6 +185,7 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, | |||
185 | rpc_authflavor_t authflavour) | 185 | rpc_authflavor_t authflavour) |
186 | { | 186 | { |
187 | char buf[INET6_ADDRSTRLEN + 1]; | 187 | char buf[INET6_ADDRSTRLEN + 1]; |
188 | struct nfs_client *old; | ||
188 | int error; | 189 | int error; |
189 | 190 | ||
190 | if (clp->cl_cons_state == NFS_CS_READY) { | 191 | if (clp->cl_cons_state == NFS_CS_READY) { |
@@ -230,6 +231,17 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, | |||
230 | 231 | ||
231 | if (!nfs4_has_session(clp)) | 232 | if (!nfs4_has_session(clp)) |
232 | nfs_mark_client_ready(clp, NFS_CS_READY); | 233 | nfs_mark_client_ready(clp, NFS_CS_READY); |
234 | |||
235 | error = nfs4_discover_server_trunking(clp, &old); | ||
236 | if (error < 0) | ||
237 | goto error; | ||
238 | if (clp != old) { | ||
239 | clp->cl_preserve_clid = true; | ||
240 | nfs_put_client(clp); | ||
241 | clp = old; | ||
242 | atomic_inc(&clp->cl_count); | ||
243 | } | ||
244 | |||
233 | return clp; | 245 | return clp; |
234 | 246 | ||
235 | error: | 247 | error: |
@@ -239,6 +251,248 @@ error: | |||
239 | return ERR_PTR(error); | 251 | return ERR_PTR(error); |
240 | } | 252 | } |
241 | 253 | ||
254 | /* | ||
255 | * SETCLIENTID just did a callback update with the callback ident in | ||
256 | * "drop," but server trunking discovery claims "drop" and "keep" are | ||
257 | * actually the same server. Swap the callback IDs so that "keep" | ||
258 | * will continue to use the callback ident the server now knows about, | ||
259 | * and so that "keep"'s original callback ident is destroyed when | ||
260 | * "drop" is freed. | ||
261 | */ | ||
262 | static void nfs4_swap_callback_idents(struct nfs_client *keep, | ||
263 | struct nfs_client *drop) | ||
264 | { | ||
265 | struct nfs_net *nn = net_generic(keep->cl_net, nfs_net_id); | ||
266 | unsigned int save = keep->cl_cb_ident; | ||
267 | |||
268 | if (keep->cl_cb_ident == drop->cl_cb_ident) | ||
269 | return; | ||
270 | |||
271 | dprintk("%s: keeping callback ident %u and dropping ident %u\n", | ||
272 | __func__, keep->cl_cb_ident, drop->cl_cb_ident); | ||
273 | |||
274 | spin_lock(&nn->nfs_client_lock); | ||
275 | |||
276 | idr_replace(&nn->cb_ident_idr, keep, drop->cl_cb_ident); | ||
277 | keep->cl_cb_ident = drop->cl_cb_ident; | ||
278 | |||
279 | idr_replace(&nn->cb_ident_idr, drop, save); | ||
280 | drop->cl_cb_ident = save; | ||
281 | |||
282 | spin_unlock(&nn->nfs_client_lock); | ||
283 | } | ||
284 | |||
285 | /** | ||
286 | * nfs40_walk_client_list - Find server that recognizes a client ID | ||
287 | * | ||
288 | * @new: nfs_client with client ID to test | ||
289 | * @result: OUT: found nfs_client, or new | ||
290 | * @cred: credential to use for trunking test | ||
291 | * | ||
292 | * Returns zero, a negative errno, or a negative NFS4ERR status. | ||
293 | * If zero is returned, an nfs_client pointer is planted in "result." | ||
294 | * | ||
295 | * NB: nfs40_walk_client_list() relies on the new nfs_client being | ||
296 | * the last nfs_client on the list. | ||
297 | */ | ||
298 | int nfs40_walk_client_list(struct nfs_client *new, | ||
299 | struct nfs_client **result, | ||
300 | struct rpc_cred *cred) | ||
301 | { | ||
302 | struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); | ||
303 | struct nfs_client *pos, *n, *prev = NULL; | ||
304 | struct nfs4_setclientid_res clid = { | ||
305 | .clientid = new->cl_clientid, | ||
306 | .confirm = new->cl_confirm, | ||
307 | }; | ||
308 | int status; | ||
309 | |||
310 | spin_lock(&nn->nfs_client_lock); | ||
311 | list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { | ||
312 | /* If "pos" isn't marked ready, we can't trust the | ||
313 | * remaining fields in "pos" */ | ||
314 | if (pos->cl_cons_state < NFS_CS_READY) | ||
315 | continue; | ||
316 | |||
317 | if (pos->rpc_ops != new->rpc_ops) | ||
318 | continue; | ||
319 | |||
320 | if (pos->cl_proto != new->cl_proto) | ||
321 | continue; | ||
322 | |||
323 | if (pos->cl_minorversion != new->cl_minorversion) | ||
324 | continue; | ||
325 | |||
326 | if (pos->cl_clientid != new->cl_clientid) | ||
327 | continue; | ||
328 | |||
329 | atomic_inc(&pos->cl_count); | ||
330 | spin_unlock(&nn->nfs_client_lock); | ||
331 | |||
332 | if (prev) | ||
333 | nfs_put_client(prev); | ||
334 | |||
335 | status = nfs4_proc_setclientid_confirm(pos, &clid, cred); | ||
336 | if (status == 0) { | ||
337 | nfs4_swap_callback_idents(pos, new); | ||
338 | |||
339 | nfs_put_client(pos); | ||
340 | *result = pos; | ||
341 | dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", | ||
342 | __func__, pos, atomic_read(&pos->cl_count)); | ||
343 | return 0; | ||
344 | } | ||
345 | if (status != -NFS4ERR_STALE_CLIENTID) { | ||
346 | nfs_put_client(pos); | ||
347 | dprintk("NFS: <-- %s status = %d, no result\n", | ||
348 | __func__, status); | ||
349 | return status; | ||
350 | } | ||
351 | |||
352 | spin_lock(&nn->nfs_client_lock); | ||
353 | prev = pos; | ||
354 | } | ||
355 | |||
356 | /* | ||
357 | * No matching nfs_client found. This should be impossible, | ||
358 | * because the new nfs_client has already been added to | ||
359 | * nfs_client_list by nfs_get_client(). | ||
360 | * | ||
361 | * Don't BUG(), since the caller is holding a mutex. | ||
362 | */ | ||
363 | if (prev) | ||
364 | nfs_put_client(prev); | ||
365 | spin_unlock(&nn->nfs_client_lock); | ||
366 | pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); | ||
367 | return -NFS4ERR_STALE_CLIENTID; | ||
368 | } | ||
369 | |||
370 | #ifdef CONFIG_NFS_V4_1 | ||
371 | /* | ||
372 | * Returns true if the client IDs match | ||
373 | */ | ||
374 | static bool nfs4_match_clientids(struct nfs_client *a, struct nfs_client *b) | ||
375 | { | ||
376 | if (a->cl_clientid != b->cl_clientid) { | ||
377 | dprintk("NFS: --> %s client ID %llx does not match %llx\n", | ||
378 | __func__, a->cl_clientid, b->cl_clientid); | ||
379 | return false; | ||
380 | } | ||
381 | dprintk("NFS: --> %s client ID %llx matches %llx\n", | ||
382 | __func__, a->cl_clientid, b->cl_clientid); | ||
383 | return true; | ||
384 | } | ||
385 | |||
386 | /* | ||
387 | * Returns true if the server owners match | ||
388 | */ | ||
389 | static bool | ||
390 | nfs4_match_serverowners(struct nfs_client *a, struct nfs_client *b) | ||
391 | { | ||
392 | struct nfs41_server_owner *o1 = a->cl_serverowner; | ||
393 | struct nfs41_server_owner *o2 = b->cl_serverowner; | ||
394 | |||
395 | if (o1->minor_id != o2->minor_id) { | ||
396 | dprintk("NFS: --> %s server owner minor IDs do not match\n", | ||
397 | __func__); | ||
398 | return false; | ||
399 | } | ||
400 | |||
401 | if (o1->major_id_sz != o2->major_id_sz) | ||
402 | goto out_major_mismatch; | ||
403 | if (memcmp(o1->major_id, o2->major_id, o1->major_id_sz) != 0) | ||
404 | goto out_major_mismatch; | ||
405 | |||
406 | dprintk("NFS: --> %s server owners match\n", __func__); | ||
407 | return true; | ||
408 | |||
409 | out_major_mismatch: | ||
410 | dprintk("NFS: --> %s server owner major IDs do not match\n", | ||
411 | __func__); | ||
412 | return false; | ||
413 | } | ||
414 | |||
415 | /** | ||
416 | * nfs41_walk_client_list - Find nfs_client that matches a client/server owner | ||
417 | * | ||
418 | * @new: nfs_client with client ID to test | ||
419 | * @result: OUT: found nfs_client, or new | ||
420 | * @cred: credential to use for trunking test | ||
421 | * | ||
422 | * Returns zero, a negative errno, or a negative NFS4ERR status. | ||
423 | * If zero is returned, an nfs_client pointer is planted in "result." | ||
424 | * | ||
425 | * NB: nfs41_walk_client_list() relies on the new nfs_client being | ||
426 | * the last nfs_client on the list. | ||
427 | */ | ||
428 | int nfs41_walk_client_list(struct nfs_client *new, | ||
429 | struct nfs_client **result, | ||
430 | struct rpc_cred *cred) | ||
431 | { | ||
432 | struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); | ||
433 | struct nfs_client *pos, *n, *prev = NULL; | ||
434 | int error; | ||
435 | |||
436 | spin_lock(&nn->nfs_client_lock); | ||
437 | list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { | ||
438 | /* If "pos" isn't marked ready, we can't trust the | ||
439 | * remaining fields in "pos", especially the client | ||
440 | * ID and serverowner fields. Wait for CREATE_SESSION | ||
441 | * to finish. */ | ||
442 | if (pos->cl_cons_state < NFS_CS_READY) { | ||
443 | atomic_inc(&pos->cl_count); | ||
444 | spin_unlock(&nn->nfs_client_lock); | ||
445 | |||
446 | if (prev) | ||
447 | nfs_put_client(prev); | ||
448 | prev = pos; | ||
449 | |||
450 | error = nfs_wait_client_init_complete(pos); | ||
451 | if (error < 0) { | ||
452 | nfs_put_client(pos); | ||
453 | spin_lock(&nn->nfs_client_lock); | ||
454 | continue; | ||
455 | } | ||
456 | |||
457 | spin_lock(&nn->nfs_client_lock); | ||
458 | } | ||
459 | |||
460 | if (pos->rpc_ops != new->rpc_ops) | ||
461 | continue; | ||
462 | |||
463 | if (pos->cl_proto != new->cl_proto) | ||
464 | continue; | ||
465 | |||
466 | if (pos->cl_minorversion != new->cl_minorversion) | ||
467 | continue; | ||
468 | |||
469 | if (!nfs4_match_clientids(pos, new)) | ||
470 | continue; | ||
471 | |||
472 | if (!nfs4_match_serverowners(pos, new)) | ||
473 | continue; | ||
474 | |||
475 | spin_unlock(&nn->nfs_client_lock); | ||
476 | dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", | ||
477 | __func__, pos, atomic_read(&pos->cl_count)); | ||
478 | |||
479 | *result = pos; | ||
480 | return 0; | ||
481 | } | ||
482 | |||
483 | /* | ||
484 | * No matching nfs_client found. This should be impossible, | ||
485 | * because the new nfs_client has already been added to | ||
486 | * nfs_client_list by nfs_get_client(). | ||
487 | * | ||
488 | * Don't BUG(), since the caller is holding a mutex. | ||
489 | */ | ||
490 | spin_unlock(&nn->nfs_client_lock); | ||
491 | pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); | ||
492 | return -NFS4ERR_STALE_CLIENTID; | ||
493 | } | ||
494 | #endif /* CONFIG_NFS_V4_1 */ | ||
495 | |||
242 | static void nfs4_destroy_server(struct nfs_server *server) | 496 | static void nfs4_destroy_server(struct nfs_server *server) |
243 | { | 497 | { |
244 | nfs_server_return_all_delegations(server); | 498 | nfs_server_return_all_delegations(server); |
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index eb5eb8eef4d3..afddd6639afb 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c | |||
@@ -95,16 +95,25 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
95 | int ret; | 95 | int ret; |
96 | struct inode *inode = file->f_path.dentry->d_inode; | 96 | struct inode *inode = file->f_path.dentry->d_inode; |
97 | 97 | ||
98 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 98 | do { |
99 | if (ret != 0) | 99 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); |
100 | goto out; | 100 | if (ret != 0) |
101 | mutex_lock(&inode->i_mutex); | 101 | break; |
102 | ret = nfs_file_fsync_commit(file, start, end, datasync); | 102 | mutex_lock(&inode->i_mutex); |
103 | if (!ret && !datasync) | 103 | ret = nfs_file_fsync_commit(file, start, end, datasync); |
104 | /* application has asked for meta-data sync */ | 104 | if (!ret && !datasync) |
105 | ret = pnfs_layoutcommit_inode(inode, true); | 105 | /* application has asked for meta-data sync */ |
106 | mutex_unlock(&inode->i_mutex); | 106 | ret = pnfs_layoutcommit_inode(inode, true); |
107 | out: | 107 | mutex_unlock(&inode->i_mutex); |
108 | /* | ||
109 | * If nfs_file_fsync_commit detected a server reboot, then | ||
110 | * resend all dirty pages that might have been covered by | ||
111 | * the NFS_CONTEXT_RESEND_WRITES flag | ||
112 | */ | ||
113 | start = 0; | ||
114 | end = LLONG_MAX; | ||
115 | } while (ret == -EAGAIN); | ||
116 | |||
108 | return ret; | 117 | return ret; |
109 | } | 118 | } |
110 | 119 | ||
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 53f94d915bd1..52d847212066 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c | |||
@@ -190,8 +190,6 @@ static int filelayout_async_handle_error(struct rpc_task *task, | |||
190 | * i/o and all i/o waiting on the slot table to the MDS until | 190 | * i/o and all i/o waiting on the slot table to the MDS until |
191 | * layout is destroyed and a new valid layout is obtained. | 191 | * layout is destroyed and a new valid layout is obtained. |
192 | */ | 192 | */ |
193 | set_bit(NFS_LAYOUT_INVALID, | ||
194 | &NFS_I(inode)->layout->plh_flags); | ||
195 | pnfs_destroy_layout(NFS_I(inode)); | 193 | pnfs_destroy_layout(NFS_I(inode)); |
196 | rpc_wake_up(&tbl->slot_tbl_waitq); | 194 | rpc_wake_up(&tbl->slot_tbl_waitq); |
197 | goto reset; | 195 | goto reset; |
@@ -205,7 +203,7 @@ static int filelayout_async_handle_error(struct rpc_task *task, | |||
205 | case -EPIPE: | 203 | case -EPIPE: |
206 | dprintk("%s DS connection error %d\n", __func__, | 204 | dprintk("%s DS connection error %d\n", __func__, |
207 | task->tk_status); | 205 | task->tk_status); |
208 | filelayout_mark_devid_invalid(devid); | 206 | nfs4_mark_deviceid_unavailable(devid); |
209 | clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags); | 207 | clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags); |
210 | _pnfs_return_layout(inode); | 208 | _pnfs_return_layout(inode); |
211 | rpc_wake_up(&tbl->slot_tbl_waitq); | 209 | rpc_wake_up(&tbl->slot_tbl_waitq); |
@@ -269,6 +267,21 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata) | |||
269 | (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); | 267 | (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); |
270 | } | 268 | } |
271 | 269 | ||
270 | bool | ||
271 | filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node) | ||
272 | { | ||
273 | return filelayout_test_devid_invalid(node) || | ||
274 | nfs4_test_deviceid_unavailable(node); | ||
275 | } | ||
276 | |||
277 | static bool | ||
278 | filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) | ||
279 | { | ||
280 | struct nfs4_deviceid_node *node = FILELAYOUT_DEVID_NODE(lseg); | ||
281 | |||
282 | return filelayout_test_devid_unavailable(node); | ||
283 | } | ||
284 | |||
272 | /* | 285 | /* |
273 | * Call ops for the async read/write cases | 286 | * Call ops for the async read/write cases |
274 | * In the case of dense layouts, the offset needs to be reset to its | 287 | * In the case of dense layouts, the offset needs to be reset to its |
@@ -453,7 +466,7 @@ static void filelayout_commit_release(void *calldata) | |||
453 | struct nfs_commit_data *data = calldata; | 466 | struct nfs_commit_data *data = calldata; |
454 | 467 | ||
455 | data->completion_ops->completion(data); | 468 | data->completion_ops->completion(data); |
456 | put_lseg(data->lseg); | 469 | pnfs_put_lseg(data->lseg); |
457 | nfs_put_client(data->ds_clp); | 470 | nfs_put_client(data->ds_clp); |
458 | nfs_commitdata_release(data); | 471 | nfs_commitdata_release(data); |
459 | } | 472 | } |
@@ -608,13 +621,13 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
608 | d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld, | 621 | d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld, |
609 | NFS_SERVER(lo->plh_inode)->nfs_client, id); | 622 | NFS_SERVER(lo->plh_inode)->nfs_client, id); |
610 | if (d == NULL) { | 623 | if (d == NULL) { |
611 | dsaddr = get_device_info(lo->plh_inode, id, gfp_flags); | 624 | dsaddr = filelayout_get_device_info(lo->plh_inode, id, gfp_flags); |
612 | if (dsaddr == NULL) | 625 | if (dsaddr == NULL) |
613 | goto out; | 626 | goto out; |
614 | } else | 627 | } else |
615 | dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); | 628 | dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); |
616 | /* Found deviceid is being reaped */ | 629 | /* Found deviceid is unavailable */ |
617 | if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags)) | 630 | if (filelayout_test_devid_unavailable(&dsaddr->id_node)) |
618 | goto out_put; | 631 | goto out_put; |
619 | 632 | ||
620 | fl->dsaddr = dsaddr; | 633 | fl->dsaddr = dsaddr; |
@@ -931,7 +944,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, | |||
931 | nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq); | 944 | nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq); |
932 | status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS); | 945 | status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS); |
933 | if (status < 0) { | 946 | if (status < 0) { |
934 | put_lseg(pgio->pg_lseg); | 947 | pnfs_put_lseg(pgio->pg_lseg); |
935 | pgio->pg_lseg = NULL; | 948 | pgio->pg_lseg = NULL; |
936 | goto out_mds; | 949 | goto out_mds; |
937 | } | 950 | } |
@@ -985,7 +998,7 @@ filelayout_clear_request_commit(struct nfs_page *req, | |||
985 | out: | 998 | out: |
986 | nfs_request_remove_commit_list(req, cinfo); | 999 | nfs_request_remove_commit_list(req, cinfo); |
987 | spin_unlock(cinfo->lock); | 1000 | spin_unlock(cinfo->lock); |
988 | put_lseg(freeme); | 1001 | pnfs_put_lseg(freeme); |
989 | } | 1002 | } |
990 | 1003 | ||
991 | static struct list_head * | 1004 | static struct list_head * |
@@ -1018,7 +1031,7 @@ filelayout_choose_commit_list(struct nfs_page *req, | |||
1018 | * off due to a rewrite, in which case it will be done in | 1031 | * off due to a rewrite, in which case it will be done in |
1019 | * filelayout_clear_request_commit | 1032 | * filelayout_clear_request_commit |
1020 | */ | 1033 | */ |
1021 | buckets[i].wlseg = get_lseg(lseg); | 1034 | buckets[i].wlseg = pnfs_get_lseg(lseg); |
1022 | } | 1035 | } |
1023 | set_bit(PG_COMMIT_TO_DS, &req->wb_flags); | 1036 | set_bit(PG_COMMIT_TO_DS, &req->wb_flags); |
1024 | cinfo->ds->nwritten++; | 1037 | cinfo->ds->nwritten++; |
@@ -1128,7 +1141,7 @@ filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, | |||
1128 | if (list_empty(src)) | 1141 | if (list_empty(src)) |
1129 | bucket->wlseg = NULL; | 1142 | bucket->wlseg = NULL; |
1130 | else | 1143 | else |
1131 | get_lseg(bucket->clseg); | 1144 | pnfs_get_lseg(bucket->clseg); |
1132 | } | 1145 | } |
1133 | return ret; | 1146 | return ret; |
1134 | } | 1147 | } |
@@ -1159,12 +1172,12 @@ static void filelayout_recover_commit_reqs(struct list_head *dst, | |||
1159 | 1172 | ||
1160 | /* NOTE cinfo->lock is NOT held, relying on fact that this is | 1173 | /* NOTE cinfo->lock is NOT held, relying on fact that this is |
1161 | * only called on single thread per dreq. | 1174 | * only called on single thread per dreq. |
1162 | * Can't take the lock because need to do put_lseg | 1175 | * Can't take the lock because need to do pnfs_put_lseg |
1163 | */ | 1176 | */ |
1164 | for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { | 1177 | for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { |
1165 | if (transfer_commit_list(&b->written, dst, cinfo, 0)) { | 1178 | if (transfer_commit_list(&b->written, dst, cinfo, 0)) { |
1166 | BUG_ON(!list_empty(&b->written)); | 1179 | BUG_ON(!list_empty(&b->written)); |
1167 | put_lseg(b->wlseg); | 1180 | pnfs_put_lseg(b->wlseg); |
1168 | b->wlseg = NULL; | 1181 | b->wlseg = NULL; |
1169 | } | 1182 | } |
1170 | } | 1183 | } |
@@ -1200,7 +1213,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) | |||
1200 | if (list_empty(&bucket->committing)) | 1213 | if (list_empty(&bucket->committing)) |
1201 | continue; | 1214 | continue; |
1202 | nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); | 1215 | nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); |
1203 | put_lseg(bucket->clseg); | 1216 | pnfs_put_lseg(bucket->clseg); |
1204 | bucket->clseg = NULL; | 1217 | bucket->clseg = NULL; |
1205 | } | 1218 | } |
1206 | /* Caller will clean up entries put on list */ | 1219 | /* Caller will clean up entries put on list */ |
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 43fe802dd678..dca47d786710 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h | |||
@@ -129,23 +129,13 @@ filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node) | |||
129 | } | 129 | } |
130 | 130 | ||
131 | static inline bool | 131 | static inline bool |
132 | filelayout_test_layout_invalid(struct pnfs_layout_hdr *lo) | ||
133 | { | ||
134 | return test_bit(NFS_LAYOUT_INVALID, &lo->plh_flags); | ||
135 | } | ||
136 | |||
137 | static inline bool | ||
138 | filelayout_test_devid_invalid(struct nfs4_deviceid_node *node) | 132 | filelayout_test_devid_invalid(struct nfs4_deviceid_node *node) |
139 | { | 133 | { |
140 | return test_bit(NFS_DEVICEID_INVALID, &node->flags); | 134 | return test_bit(NFS_DEVICEID_INVALID, &node->flags); |
141 | } | 135 | } |
142 | 136 | ||
143 | static inline bool | 137 | extern bool |
144 | filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) | 138 | filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node); |
145 | { | ||
146 | return filelayout_test_devid_invalid(FILELAYOUT_DEVID_NODE(lseg)) || | ||
147 | filelayout_test_layout_invalid(lseg->pls_layout); | ||
148 | } | ||
149 | 139 | ||
150 | extern struct nfs_fh * | 140 | extern struct nfs_fh * |
151 | nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); | 141 | nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); |
@@ -158,7 +148,7 @@ struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, | |||
158 | extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); | 148 | extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); |
159 | extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); | 149 | extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); |
160 | struct nfs4_file_layout_dsaddr * | 150 | struct nfs4_file_layout_dsaddr * |
161 | get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); | 151 | filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); |
162 | void nfs4_ds_disconnect(struct nfs_client *clp); | 152 | void nfs4_ds_disconnect(struct nfs_client *clp); |
163 | 153 | ||
164 | #endif /* FS_NFS_NFS4FILELAYOUT_H */ | 154 | #endif /* FS_NFS_NFS4FILELAYOUT_H */ |
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index f81231f30d94..3336d5eaf879 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c | |||
@@ -690,7 +690,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl | |||
690 | * of available devices, and return it. | 690 | * of available devices, and return it. |
691 | */ | 691 | */ |
692 | struct nfs4_file_layout_dsaddr * | 692 | struct nfs4_file_layout_dsaddr * |
693 | get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags) | 693 | filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags) |
694 | { | 694 | { |
695 | struct pnfs_device *pdev = NULL; | 695 | struct pnfs_device *pdev = NULL; |
696 | u32 max_resp_sz; | 696 | u32 max_resp_sz; |
@@ -804,13 +804,14 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) | |||
804 | struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; | 804 | struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; |
805 | struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); | 805 | struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); |
806 | 806 | ||
807 | if (filelayout_test_devid_invalid(devid)) | 807 | if (filelayout_test_devid_unavailable(devid)) |
808 | return NULL; | 808 | return NULL; |
809 | 809 | ||
810 | if (ds == NULL) { | 810 | if (ds == NULL) { |
811 | printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", | 811 | printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", |
812 | __func__, ds_idx); | 812 | __func__, ds_idx); |
813 | goto mark_dev_invalid; | 813 | filelayout_mark_devid_invalid(devid); |
814 | return NULL; | ||
814 | } | 815 | } |
815 | 816 | ||
816 | if (!ds->ds_clp) { | 817 | if (!ds->ds_clp) { |
@@ -818,14 +819,12 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) | |||
818 | int err; | 819 | int err; |
819 | 820 | ||
820 | err = nfs4_ds_connect(s, ds); | 821 | err = nfs4_ds_connect(s, ds); |
821 | if (err) | 822 | if (err) { |
822 | goto mark_dev_invalid; | 823 | nfs4_mark_deviceid_unavailable(devid); |
824 | return NULL; | ||
825 | } | ||
823 | } | 826 | } |
824 | return ds; | 827 | return ds; |
825 | |||
826 | mark_dev_invalid: | ||
827 | filelayout_mark_devid_invalid(devid); | ||
828 | return NULL; | ||
829 | } | 828 | } |
830 | 829 | ||
831 | module_param(dataserver_retrans, uint, 0644); | 830 | module_param(dataserver_retrans, uint, 0644); |
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 017b4b01a69c..79fbb61ce202 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c | |||
@@ -192,25 +192,13 @@ out: | |||
192 | struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *inode, | 192 | struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *inode, |
193 | struct qstr *name) | 193 | struct qstr *name) |
194 | { | 194 | { |
195 | struct rpc_clnt *clone; | ||
196 | struct rpc_auth *auth; | ||
197 | rpc_authflavor_t flavor; | 195 | rpc_authflavor_t flavor; |
198 | 196 | ||
199 | flavor = nfs4_negotiate_security(inode, name); | 197 | flavor = nfs4_negotiate_security(inode, name); |
200 | if ((int)flavor < 0) | 198 | if ((int)flavor < 0) |
201 | return ERR_PTR(flavor); | 199 | return ERR_PTR((int)flavor); |
202 | 200 | ||
203 | clone = rpc_clone_client(clnt); | 201 | return rpc_clone_client_set_auth(clnt, flavor); |
204 | if (IS_ERR(clone)) | ||
205 | return clone; | ||
206 | |||
207 | auth = rpcauth_create(flavor, clone); | ||
208 | if (!auth) { | ||
209 | rpc_shutdown_client(clone); | ||
210 | clone = ERR_PTR(-EIO); | ||
211 | } | ||
212 | |||
213 | return clone; | ||
214 | } | 202 | } |
215 | 203 | ||
216 | static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, | 204 | static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1e50326d00dd..68b21d81b7ac 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -104,6 +104,8 @@ static int nfs4_map_errors(int err) | |||
104 | return -EACCES; | 104 | return -EACCES; |
105 | case -NFS4ERR_MINOR_VERS_MISMATCH: | 105 | case -NFS4ERR_MINOR_VERS_MISMATCH: |
106 | return -EPROTONOSUPPORT; | 106 | return -EPROTONOSUPPORT; |
107 | case -NFS4ERR_ACCESS: | ||
108 | return -EACCES; | ||
107 | default: | 109 | default: |
108 | dprintk("%s could not handle NFSv4 error %d\n", | 110 | dprintk("%s could not handle NFSv4 error %d\n", |
109 | __func__, -err); | 111 | __func__, -err); |
@@ -150,6 +152,12 @@ static const u32 nfs4_pnfs_open_bitmap[3] = { | |||
150 | FATTR4_WORD2_MDSTHRESHOLD | 152 | FATTR4_WORD2_MDSTHRESHOLD |
151 | }; | 153 | }; |
152 | 154 | ||
155 | static const u32 nfs4_open_noattr_bitmap[3] = { | ||
156 | FATTR4_WORD0_TYPE | ||
157 | | FATTR4_WORD0_CHANGE | ||
158 | | FATTR4_WORD0_FILEID, | ||
159 | }; | ||
160 | |||
153 | const u32 nfs4_statfs_bitmap[2] = { | 161 | const u32 nfs4_statfs_bitmap[2] = { |
154 | FATTR4_WORD0_FILES_AVAIL | 162 | FATTR4_WORD0_FILES_AVAIL |
155 | | FATTR4_WORD0_FILES_FREE | 163 | | FATTR4_WORD0_FILES_FREE |
@@ -832,6 +840,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) | |||
832 | p->o_res.seqid = p->o_arg.seqid; | 840 | p->o_res.seqid = p->o_arg.seqid; |
833 | p->c_res.seqid = p->c_arg.seqid; | 841 | p->c_res.seqid = p->c_arg.seqid; |
834 | p->o_res.server = p->o_arg.server; | 842 | p->o_res.server = p->o_arg.server; |
843 | p->o_res.access_request = p->o_arg.access; | ||
835 | nfs_fattr_init(&p->f_attr); | 844 | nfs_fattr_init(&p->f_attr); |
836 | nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name); | 845 | nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name); |
837 | } | 846 | } |
@@ -860,6 +869,14 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, | |||
860 | p->o_arg.fh = NFS_FH(dir); | 869 | p->o_arg.fh = NFS_FH(dir); |
861 | p->o_arg.open_flags = flags; | 870 | p->o_arg.open_flags = flags; |
862 | p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); | 871 | p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); |
872 | /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS | ||
873 | * will return permission denied for all bits until close */ | ||
874 | if (!(flags & O_EXCL)) { | ||
875 | /* ask server to check for all possible rights as results | ||
876 | * are cached */ | ||
877 | p->o_arg.access = NFS4_ACCESS_READ | NFS4_ACCESS_MODIFY | | ||
878 | NFS4_ACCESS_EXTEND | NFS4_ACCESS_EXECUTE; | ||
879 | } | ||
863 | p->o_arg.clientid = server->nfs_client->cl_clientid; | 880 | p->o_arg.clientid = server->nfs_client->cl_clientid; |
864 | p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time); | 881 | p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time); |
865 | p->o_arg.id.uniquifier = sp->so_seqid.owner_id; | 882 | p->o_arg.id.uniquifier = sp->so_seqid.owner_id; |
@@ -1115,11 +1132,80 @@ out_return_state: | |||
1115 | return state; | 1132 | return state; |
1116 | } | 1133 | } |
1117 | 1134 | ||
1118 | static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) | 1135 | static void |
1136 | nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state) | ||
1137 | { | ||
1138 | struct nfs_client *clp = NFS_SERVER(state->inode)->nfs_client; | ||
1139 | struct nfs_delegation *delegation; | ||
1140 | int delegation_flags = 0; | ||
1141 | |||
1142 | rcu_read_lock(); | ||
1143 | delegation = rcu_dereference(NFS_I(state->inode)->delegation); | ||
1144 | if (delegation) | ||
1145 | delegation_flags = delegation->flags; | ||
1146 | rcu_read_unlock(); | ||
1147 | if (data->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR) { | ||
1148 | pr_err_ratelimited("NFS: Broken NFSv4 server %s is " | ||
1149 | "returning a delegation for " | ||
1150 | "OPEN(CLAIM_DELEGATE_CUR)\n", | ||
1151 | clp->cl_hostname); | ||
1152 | } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0) | ||
1153 | nfs_inode_set_delegation(state->inode, | ||
1154 | data->owner->so_cred, | ||
1155 | &data->o_res); | ||
1156 | else | ||
1157 | nfs_inode_reclaim_delegation(state->inode, | ||
1158 | data->owner->so_cred, | ||
1159 | &data->o_res); | ||
1160 | } | ||
1161 | |||
1162 | /* | ||
1163 | * Check the inode attributes against the CLAIM_PREVIOUS returned attributes | ||
1164 | * and update the nfs4_state. | ||
1165 | */ | ||
1166 | static struct nfs4_state * | ||
1167 | _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) | ||
1168 | { | ||
1169 | struct inode *inode = data->state->inode; | ||
1170 | struct nfs4_state *state = data->state; | ||
1171 | int ret; | ||
1172 | |||
1173 | if (!data->rpc_done) { | ||
1174 | ret = data->rpc_status; | ||
1175 | goto err; | ||
1176 | } | ||
1177 | |||
1178 | ret = -ESTALE; | ||
1179 | if (!(data->f_attr.valid & NFS_ATTR_FATTR_TYPE) || | ||
1180 | !(data->f_attr.valid & NFS_ATTR_FATTR_FILEID) || | ||
1181 | !(data->f_attr.valid & NFS_ATTR_FATTR_CHANGE)) | ||
1182 | goto err; | ||
1183 | |||
1184 | ret = -ENOMEM; | ||
1185 | state = nfs4_get_open_state(inode, data->owner); | ||
1186 | if (state == NULL) | ||
1187 | goto err; | ||
1188 | |||
1189 | ret = nfs_refresh_inode(inode, &data->f_attr); | ||
1190 | if (ret) | ||
1191 | goto err; | ||
1192 | |||
1193 | if (data->o_res.delegation_type != 0) | ||
1194 | nfs4_opendata_check_deleg(data, state); | ||
1195 | update_open_stateid(state, &data->o_res.stateid, NULL, | ||
1196 | data->o_arg.fmode); | ||
1197 | |||
1198 | return state; | ||
1199 | err: | ||
1200 | return ERR_PTR(ret); | ||
1201 | |||
1202 | } | ||
1203 | |||
1204 | static struct nfs4_state * | ||
1205 | _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) | ||
1119 | { | 1206 | { |
1120 | struct inode *inode; | 1207 | struct inode *inode; |
1121 | struct nfs4_state *state = NULL; | 1208 | struct nfs4_state *state = NULL; |
1122 | struct nfs_delegation *delegation; | ||
1123 | int ret; | 1209 | int ret; |
1124 | 1210 | ||
1125 | if (!data->rpc_done) { | 1211 | if (!data->rpc_done) { |
@@ -1138,30 +1224,8 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data | |||
1138 | state = nfs4_get_open_state(inode, data->owner); | 1224 | state = nfs4_get_open_state(inode, data->owner); |
1139 | if (state == NULL) | 1225 | if (state == NULL) |
1140 | goto err_put_inode; | 1226 | goto err_put_inode; |
1141 | if (data->o_res.delegation_type != 0) { | 1227 | if (data->o_res.delegation_type != 0) |
1142 | struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; | 1228 | nfs4_opendata_check_deleg(data, state); |
1143 | int delegation_flags = 0; | ||
1144 | |||
1145 | rcu_read_lock(); | ||
1146 | delegation = rcu_dereference(NFS_I(inode)->delegation); | ||
1147 | if (delegation) | ||
1148 | delegation_flags = delegation->flags; | ||
1149 | rcu_read_unlock(); | ||
1150 | if (data->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR) { | ||
1151 | pr_err_ratelimited("NFS: Broken NFSv4 server %s is " | ||
1152 | "returning a delegation for " | ||
1153 | "OPEN(CLAIM_DELEGATE_CUR)\n", | ||
1154 | clp->cl_hostname); | ||
1155 | } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0) | ||
1156 | nfs_inode_set_delegation(state->inode, | ||
1157 | data->owner->so_cred, | ||
1158 | &data->o_res); | ||
1159 | else | ||
1160 | nfs_inode_reclaim_delegation(state->inode, | ||
1161 | data->owner->so_cred, | ||
1162 | &data->o_res); | ||
1163 | } | ||
1164 | |||
1165 | update_open_stateid(state, &data->o_res.stateid, NULL, | 1229 | update_open_stateid(state, &data->o_res.stateid, NULL, |
1166 | data->o_arg.fmode); | 1230 | data->o_arg.fmode); |
1167 | iput(inode); | 1231 | iput(inode); |
@@ -1173,6 +1237,14 @@ err: | |||
1173 | return ERR_PTR(ret); | 1237 | return ERR_PTR(ret); |
1174 | } | 1238 | } |
1175 | 1239 | ||
1240 | static struct nfs4_state * | ||
1241 | nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) | ||
1242 | { | ||
1243 | if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) | ||
1244 | return _nfs4_opendata_reclaim_to_nfs4_state(data); | ||
1245 | return _nfs4_opendata_to_nfs4_state(data); | ||
1246 | } | ||
1247 | |||
1176 | static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) | 1248 | static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) |
1177 | { | 1249 | { |
1178 | struct nfs_inode *nfsi = NFS_I(state->inode); | 1250 | struct nfs_inode *nfsi = NFS_I(state->inode); |
@@ -1494,6 +1566,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) | |||
1494 | data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; | 1566 | data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; |
1495 | if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { | 1567 | if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { |
1496 | task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; | 1568 | task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; |
1569 | data->o_arg.open_bitmap = &nfs4_open_noattr_bitmap[0]; | ||
1497 | nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); | 1570 | nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); |
1498 | } | 1571 | } |
1499 | data->timestamp = jiffies; | 1572 | data->timestamp = jiffies; |
@@ -1526,7 +1599,8 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata) | |||
1526 | return; | 1599 | return; |
1527 | 1600 | ||
1528 | if (task->tk_status == 0) { | 1601 | if (task->tk_status == 0) { |
1529 | switch (data->o_res.f_attr->mode & S_IFMT) { | 1602 | if (data->o_res.f_attr->valid & NFS_ATTR_FATTR_TYPE) { |
1603 | switch (data->o_res.f_attr->mode & S_IFMT) { | ||
1530 | case S_IFREG: | 1604 | case S_IFREG: |
1531 | break; | 1605 | break; |
1532 | case S_IFLNK: | 1606 | case S_IFLNK: |
@@ -1537,6 +1611,7 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata) | |||
1537 | break; | 1611 | break; |
1538 | default: | 1612 | default: |
1539 | data->rpc_status = -ENOTDIR; | 1613 | data->rpc_status = -ENOTDIR; |
1614 | } | ||
1540 | } | 1615 | } |
1541 | renew_lease(data->o_res.server, data->timestamp); | 1616 | renew_lease(data->o_res.server, data->timestamp); |
1542 | if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)) | 1617 | if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)) |
@@ -1643,6 +1718,39 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data) | |||
1643 | return status; | 1718 | return status; |
1644 | } | 1719 | } |
1645 | 1720 | ||
1721 | static int nfs4_opendata_access(struct rpc_cred *cred, | ||
1722 | struct nfs4_opendata *opendata, | ||
1723 | struct nfs4_state *state, fmode_t fmode) | ||
1724 | { | ||
1725 | struct nfs_access_entry cache; | ||
1726 | u32 mask; | ||
1727 | |||
1728 | /* access call failed or for some reason the server doesn't | ||
1729 | * support any access modes -- defer access call until later */ | ||
1730 | if (opendata->o_res.access_supported == 0) | ||
1731 | return 0; | ||
1732 | |||
1733 | mask = 0; | ||
1734 | /* don't check MAY_WRITE - a newly created file may not have | ||
1735 | * write mode bits, but POSIX allows the creating process to write */ | ||
1736 | if (fmode & FMODE_READ) | ||
1737 | mask |= MAY_READ; | ||
1738 | if (fmode & FMODE_EXEC) | ||
1739 | mask |= MAY_EXEC; | ||
1740 | |||
1741 | cache.cred = cred; | ||
1742 | cache.jiffies = jiffies; | ||
1743 | nfs_access_set_mask(&cache, opendata->o_res.access_result); | ||
1744 | nfs_access_add_cache(state->inode, &cache); | ||
1745 | |||
1746 | if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0) | ||
1747 | return 0; | ||
1748 | |||
1749 | /* even though OPEN succeeded, access is denied. Close the file */ | ||
1750 | nfs4_close_state(state, fmode); | ||
1751 | return -NFS4ERR_ACCESS; | ||
1752 | } | ||
1753 | |||
1646 | /* | 1754 | /* |
1647 | * Note: On error, nfs4_proc_open will free the struct nfs4_opendata | 1755 | * Note: On error, nfs4_proc_open will free the struct nfs4_opendata |
1648 | */ | 1756 | */ |
@@ -1774,7 +1882,11 @@ static void nfs41_clear_delegation_stateid(struct nfs4_state *state) | |||
1774 | * informs us the stateid is unrecognized. */ | 1882 | * informs us the stateid is unrecognized. */ |
1775 | if (status != -NFS4ERR_BAD_STATEID) | 1883 | if (status != -NFS4ERR_BAD_STATEID) |
1776 | nfs41_free_stateid(server, stateid); | 1884 | nfs41_free_stateid(server, stateid); |
1885 | nfs_remove_bad_delegation(state->inode); | ||
1777 | 1886 | ||
1887 | write_seqlock(&state->seqlock); | ||
1888 | nfs4_stateid_copy(&state->stateid, &state->open_stateid); | ||
1889 | write_sequnlock(&state->seqlock); | ||
1778 | clear_bit(NFS_DELEGATED_STATE, &state->flags); | 1890 | clear_bit(NFS_DELEGATED_STATE, &state->flags); |
1779 | } | 1891 | } |
1780 | } | 1892 | } |
@@ -1790,7 +1902,7 @@ static void nfs41_clear_delegation_stateid(struct nfs4_state *state) | |||
1790 | static int nfs41_check_open_stateid(struct nfs4_state *state) | 1902 | static int nfs41_check_open_stateid(struct nfs4_state *state) |
1791 | { | 1903 | { |
1792 | struct nfs_server *server = NFS_SERVER(state->inode); | 1904 | struct nfs_server *server = NFS_SERVER(state->inode); |
1793 | nfs4_stateid *stateid = &state->stateid; | 1905 | nfs4_stateid *stateid = &state->open_stateid; |
1794 | int status; | 1906 | int status; |
1795 | 1907 | ||
1796 | /* If a state reset has been done, test_stateid is unneeded */ | 1908 | /* If a state reset has been done, test_stateid is unneeded */ |
@@ -1896,6 +2008,10 @@ static int _nfs4_do_open(struct inode *dir, | |||
1896 | if (server->caps & NFS_CAP_POSIX_LOCK) | 2008 | if (server->caps & NFS_CAP_POSIX_LOCK) |
1897 | set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); | 2009 | set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); |
1898 | 2010 | ||
2011 | status = nfs4_opendata_access(cred, opendata, state, fmode); | ||
2012 | if (status != 0) | ||
2013 | goto err_opendata_put; | ||
2014 | |||
1899 | if (opendata->o_arg.open_flags & O_EXCL) { | 2015 | if (opendata->o_arg.open_flags & O_EXCL) { |
1900 | nfs4_exclusive_attrset(opendata, sattr); | 2016 | nfs4_exclusive_attrset(opendata, sattr); |
1901 | 2017 | ||
@@ -1941,7 +2057,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, | |||
1941 | struct nfs4_state *res; | 2057 | struct nfs4_state *res; |
1942 | int status; | 2058 | int status; |
1943 | 2059 | ||
1944 | fmode &= FMODE_READ|FMODE_WRITE; | 2060 | fmode &= FMODE_READ|FMODE_WRITE|FMODE_EXEC; |
1945 | do { | 2061 | do { |
1946 | status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, | 2062 | status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, |
1947 | &res, ctx_th); | 2063 | &res, ctx_th); |
@@ -2013,8 +2129,12 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
2013 | nfs_fattr_init(fattr); | 2129 | nfs_fattr_init(fattr); |
2014 | 2130 | ||
2015 | if (state != NULL) { | 2131 | if (state != NULL) { |
2132 | struct nfs_lockowner lockowner = { | ||
2133 | .l_owner = current->files, | ||
2134 | .l_pid = current->tgid, | ||
2135 | }; | ||
2016 | nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, | 2136 | nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, |
2017 | current->files, current->tgid); | 2137 | &lockowner); |
2018 | } else if (nfs4_copy_delegation_stateid(&arg.stateid, inode, | 2138 | } else if (nfs4_copy_delegation_stateid(&arg.stateid, inode, |
2019 | FMODE_WRITE)) { | 2139 | FMODE_WRITE)) { |
2020 | /* Use that stateid */ | 2140 | /* Use that stateid */ |
@@ -2133,6 +2253,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) | |||
2133 | { | 2253 | { |
2134 | struct nfs4_closedata *calldata = data; | 2254 | struct nfs4_closedata *calldata = data; |
2135 | struct nfs4_state *state = calldata->state; | 2255 | struct nfs4_state *state = calldata->state; |
2256 | struct inode *inode = calldata->inode; | ||
2136 | int call_close = 0; | 2257 | int call_close = 0; |
2137 | 2258 | ||
2138 | dprintk("%s: begin!\n", __func__); | 2259 | dprintk("%s: begin!\n", __func__); |
@@ -2166,16 +2287,13 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) | |||
2166 | if (calldata->arg.fmode == 0) { | 2287 | if (calldata->arg.fmode == 0) { |
2167 | task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; | 2288 | task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; |
2168 | if (calldata->roc && | 2289 | if (calldata->roc && |
2169 | pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) { | 2290 | pnfs_roc_drain(inode, &calldata->roc_barrier, task)) |
2170 | rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq, | ||
2171 | task, NULL); | ||
2172 | goto out; | 2291 | goto out; |
2173 | } | ||
2174 | } | 2292 | } |
2175 | 2293 | ||
2176 | nfs_fattr_init(calldata->res.fattr); | 2294 | nfs_fattr_init(calldata->res.fattr); |
2177 | calldata->timestamp = jiffies; | 2295 | calldata->timestamp = jiffies; |
2178 | if (nfs4_setup_sequence(NFS_SERVER(calldata->inode), | 2296 | if (nfs4_setup_sequence(NFS_SERVER(inode), |
2179 | &calldata->arg.seq_args, | 2297 | &calldata->arg.seq_args, |
2180 | &calldata->res.seq_res, | 2298 | &calldata->res.seq_res, |
2181 | task)) | 2299 | task)) |
@@ -2202,7 +2320,7 @@ static const struct rpc_call_ops nfs4_close_ops = { | |||
2202 | * | 2320 | * |
2203 | * NOTE: Caller must be holding the sp->so_owner semaphore! | 2321 | * NOTE: Caller must be holding the sp->so_owner semaphore! |
2204 | */ | 2322 | */ |
2205 | int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) | 2323 | int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) |
2206 | { | 2324 | { |
2207 | struct nfs_server *server = NFS_SERVER(state->inode); | 2325 | struct nfs_server *server = NFS_SERVER(state->inode); |
2208 | struct nfs4_closedata *calldata; | 2326 | struct nfs4_closedata *calldata; |
@@ -2238,7 +2356,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) | |||
2238 | calldata->res.fattr = &calldata->fattr; | 2356 | calldata->res.fattr = &calldata->fattr; |
2239 | calldata->res.seqid = calldata->arg.seqid; | 2357 | calldata->res.seqid = calldata->arg.seqid; |
2240 | calldata->res.server = server; | 2358 | calldata->res.server = server; |
2241 | calldata->roc = roc; | 2359 | calldata->roc = pnfs_roc(state->inode); |
2242 | nfs_sb_active(calldata->inode->i_sb); | 2360 | nfs_sb_active(calldata->inode->i_sb); |
2243 | 2361 | ||
2244 | msg.rpc_argp = &calldata->arg; | 2362 | msg.rpc_argp = &calldata->arg; |
@@ -2255,8 +2373,6 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) | |||
2255 | out_free_calldata: | 2373 | out_free_calldata: |
2256 | kfree(calldata); | 2374 | kfree(calldata); |
2257 | out: | 2375 | out: |
2258 | if (roc) | ||
2259 | pnfs_roc_release(state->inode); | ||
2260 | nfs4_put_open_state(state); | 2376 | nfs4_put_open_state(state); |
2261 | nfs4_put_state_owner(sp); | 2377 | nfs4_put_state_owner(sp); |
2262 | return status; | 2378 | return status; |
@@ -2399,7 +2515,7 @@ static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandl | |||
2399 | int ret; | 2515 | int ret; |
2400 | 2516 | ||
2401 | auth = rpcauth_create(flavor, server->client); | 2517 | auth = rpcauth_create(flavor, server->client); |
2402 | if (!auth) { | 2518 | if (IS_ERR(auth)) { |
2403 | ret = -EIO; | 2519 | ret = -EIO; |
2404 | goto out; | 2520 | goto out; |
2405 | } | 2521 | } |
@@ -2767,13 +2883,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry | |||
2767 | 2883 | ||
2768 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); | 2884 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
2769 | if (!status) { | 2885 | if (!status) { |
2770 | entry->mask = 0; | 2886 | nfs_access_set_mask(entry, res.access); |
2771 | if (res.access & NFS4_ACCESS_READ) | ||
2772 | entry->mask |= MAY_READ; | ||
2773 | if (res.access & (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE)) | ||
2774 | entry->mask |= MAY_WRITE; | ||
2775 | if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) | ||
2776 | entry->mask |= MAY_EXEC; | ||
2777 | nfs_refresh_inode(inode, res.fattr); | 2887 | nfs_refresh_inode(inode, res.fattr); |
2778 | } | 2888 | } |
2779 | nfs_free_fattr(res.fattr); | 2889 | nfs_free_fattr(res.fattr); |
@@ -3362,8 +3472,11 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, s | |||
3362 | 3472 | ||
3363 | nfs_fattr_init(fsinfo->fattr); | 3473 | nfs_fattr_init(fsinfo->fattr); |
3364 | error = nfs4_do_fsinfo(server, fhandle, fsinfo); | 3474 | error = nfs4_do_fsinfo(server, fhandle, fsinfo); |
3365 | if (error == 0) | 3475 | if (error == 0) { |
3476 | /* block layout checks this! */ | ||
3477 | server->pnfs_blksize = fsinfo->blksize; | ||
3366 | set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype); | 3478 | set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype); |
3479 | } | ||
3367 | 3480 | ||
3368 | return error; | 3481 | return error; |
3369 | } | 3482 | } |
@@ -4007,6 +4120,36 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, | |||
4007 | memcpy(bootverf->data, verf, sizeof(bootverf->data)); | 4120 | memcpy(bootverf->data, verf, sizeof(bootverf->data)); |
4008 | } | 4121 | } |
4009 | 4122 | ||
4123 | static unsigned int | ||
4124 | nfs4_init_nonuniform_client_string(const struct nfs_client *clp, | ||
4125 | char *buf, size_t len) | ||
4126 | { | ||
4127 | unsigned int result; | ||
4128 | |||
4129 | rcu_read_lock(); | ||
4130 | result = scnprintf(buf, len, "Linux NFSv4.0 %s/%s %s", | ||
4131 | clp->cl_ipaddr, | ||
4132 | rpc_peeraddr2str(clp->cl_rpcclient, | ||
4133 | RPC_DISPLAY_ADDR), | ||
4134 | rpc_peeraddr2str(clp->cl_rpcclient, | ||
4135 | RPC_DISPLAY_PROTO)); | ||
4136 | rcu_read_unlock(); | ||
4137 | return result; | ||
4138 | } | ||
4139 | |||
4140 | static unsigned int | ||
4141 | nfs4_init_uniform_client_string(const struct nfs_client *clp, | ||
4142 | char *buf, size_t len) | ||
4143 | { | ||
4144 | char *nodename = clp->cl_rpcclient->cl_nodename; | ||
4145 | |||
4146 | if (nfs4_client_id_uniquifier[0] != '\0') | ||
4147 | nodename = nfs4_client_id_uniquifier; | ||
4148 | return scnprintf(buf, len, "Linux NFSv%u.%u %s", | ||
4149 | clp->rpc_ops->version, clp->cl_minorversion, | ||
4150 | nodename); | ||
4151 | } | ||
4152 | |||
4010 | /** | 4153 | /** |
4011 | * nfs4_proc_setclientid - Negotiate client ID | 4154 | * nfs4_proc_setclientid - Negotiate client ID |
4012 | * @clp: state data structure | 4155 | * @clp: state data structure |
@@ -4037,15 +4180,18 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, | |||
4037 | 4180 | ||
4038 | /* nfs_client_id4 */ | 4181 | /* nfs_client_id4 */ |
4039 | nfs4_init_boot_verifier(clp, &sc_verifier); | 4182 | nfs4_init_boot_verifier(clp, &sc_verifier); |
4040 | rcu_read_lock(); | 4183 | if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags)) |
4041 | setclientid.sc_name_len = scnprintf(setclientid.sc_name, | 4184 | setclientid.sc_name_len = |
4042 | sizeof(setclientid.sc_name), "%s/%s %s", | 4185 | nfs4_init_uniform_client_string(clp, |
4043 | clp->cl_ipaddr, | 4186 | setclientid.sc_name, |
4044 | rpc_peeraddr2str(clp->cl_rpcclient, | 4187 | sizeof(setclientid.sc_name)); |
4045 | RPC_DISPLAY_ADDR), | 4188 | else |
4046 | rpc_peeraddr2str(clp->cl_rpcclient, | 4189 | setclientid.sc_name_len = |
4047 | RPC_DISPLAY_PROTO)); | 4190 | nfs4_init_nonuniform_client_string(clp, |
4191 | setclientid.sc_name, | ||
4192 | sizeof(setclientid.sc_name)); | ||
4048 | /* cb_client4 */ | 4193 | /* cb_client4 */ |
4194 | rcu_read_lock(); | ||
4049 | setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, | 4195 | setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, |
4050 | sizeof(setclientid.sc_netid), | 4196 | sizeof(setclientid.sc_netid), |
4051 | rpc_peeraddr2str(clp->cl_rpcclient, | 4197 | rpc_peeraddr2str(clp->cl_rpcclient, |
@@ -4391,7 +4537,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) | |||
4391 | 4537 | ||
4392 | if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) | 4538 | if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) |
4393 | return; | 4539 | return; |
4394 | if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) { | 4540 | if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) { |
4395 | /* Note: exit _without_ running nfs4_locku_done */ | 4541 | /* Note: exit _without_ running nfs4_locku_done */ |
4396 | task->tk_action = NULL; | 4542 | task->tk_action = NULL; |
4397 | return; | 4543 | return; |
@@ -4585,7 +4731,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) | |||
4585 | } | 4731 | } |
4586 | if (data->rpc_status == 0) { | 4732 | if (data->rpc_status == 0) { |
4587 | nfs4_stateid_copy(&data->lsp->ls_stateid, &data->res.stateid); | 4733 | nfs4_stateid_copy(&data->lsp->ls_stateid, &data->res.stateid); |
4588 | data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; | 4734 | set_bit(NFS_LOCK_INITIALIZED, &data->lsp->ls_flags); |
4589 | renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); | 4735 | renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); |
4590 | } | 4736 | } |
4591 | out: | 4737 | out: |
@@ -4632,7 +4778,7 @@ static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_ | |||
4632 | case -NFS4ERR_BAD_STATEID: | 4778 | case -NFS4ERR_BAD_STATEID: |
4633 | lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; | 4779 | lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; |
4634 | if (new_lock_owner != 0 || | 4780 | if (new_lock_owner != 0 || |
4635 | (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) | 4781 | test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) |
4636 | nfs4_schedule_stateid_recovery(server, lsp->ls_state); | 4782 | nfs4_schedule_stateid_recovery(server, lsp->ls_state); |
4637 | break; | 4783 | break; |
4638 | case -NFS4ERR_STALE_STATEID: | 4784 | case -NFS4ERR_STALE_STATEID: |
@@ -4756,7 +4902,7 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) | |||
4756 | struct nfs_server *server = NFS_SERVER(state->inode); | 4902 | struct nfs_server *server = NFS_SERVER(state->inode); |
4757 | 4903 | ||
4758 | list_for_each_entry(lsp, &state->lock_states, ls_locks) { | 4904 | list_for_each_entry(lsp, &state->lock_states, ls_locks) { |
4759 | if (lsp->ls_flags & NFS_LOCK_INITIALIZED) { | 4905 | if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { |
4760 | status = nfs41_test_stateid(server, &lsp->ls_stateid); | 4906 | status = nfs41_test_stateid(server, &lsp->ls_stateid); |
4761 | if (status != NFS_OK) { | 4907 | if (status != NFS_OK) { |
4762 | /* Free the stateid unless the server | 4908 | /* Free the stateid unless the server |
@@ -4764,7 +4910,7 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) | |||
4764 | if (status != -NFS4ERR_BAD_STATEID) | 4910 | if (status != -NFS4ERR_BAD_STATEID) |
4765 | nfs41_free_stateid(server, | 4911 | nfs41_free_stateid(server, |
4766 | &lsp->ls_stateid); | 4912 | &lsp->ls_stateid); |
4767 | lsp->ls_flags &= ~NFS_LOCK_INITIALIZED; | 4913 | clear_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); |
4768 | ret = status; | 4914 | ret = status; |
4769 | } | 4915 | } |
4770 | } | 4916 | } |
@@ -5267,10 +5413,8 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) | |||
5267 | }; | 5413 | }; |
5268 | 5414 | ||
5269 | nfs4_init_boot_verifier(clp, &verifier); | 5415 | nfs4_init_boot_verifier(clp, &verifier); |
5270 | args.id_len = scnprintf(args.id, sizeof(args.id), | 5416 | args.id_len = nfs4_init_uniform_client_string(clp, args.id, |
5271 | "%s/%s", | 5417 | sizeof(args.id)); |
5272 | clp->cl_ipaddr, | ||
5273 | clp->cl_rpcclient->cl_nodename); | ||
5274 | dprintk("NFS call exchange_id auth=%s, '%.*s'\n", | 5418 | dprintk("NFS call exchange_id auth=%s, '%.*s'\n", |
5275 | clp->cl_rpcclient->cl_auth->au_ops->au_name, | 5419 | clp->cl_rpcclient->cl_auth->au_ops->au_name, |
5276 | args.id_len, args.id); | 5420 | args.id_len, args.id); |
@@ -5391,6 +5535,8 @@ int nfs4_destroy_clientid(struct nfs_client *clp) | |||
5391 | goto out; | 5535 | goto out; |
5392 | if (clp->cl_exchange_flags == 0) | 5536 | if (clp->cl_exchange_flags == 0) |
5393 | goto out; | 5537 | goto out; |
5538 | if (clp->cl_preserve_clid) | ||
5539 | goto out; | ||
5394 | cred = nfs4_get_exchange_id_cred(clp); | 5540 | cred = nfs4_get_exchange_id_cred(clp); |
5395 | ret = nfs4_proc_destroy_clientid(clp, cred); | 5541 | ret = nfs4_proc_destroy_clientid(clp, cred); |
5396 | if (cred) | 5542 | if (cred) |
@@ -6196,26 +6342,44 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) | |||
6196 | static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) | 6342 | static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) |
6197 | { | 6343 | { |
6198 | struct nfs4_layoutget *lgp = calldata; | 6344 | struct nfs4_layoutget *lgp = calldata; |
6199 | struct nfs_server *server = NFS_SERVER(lgp->args.inode); | 6345 | struct inode *inode = lgp->args.inode; |
6346 | struct nfs_server *server = NFS_SERVER(inode); | ||
6347 | struct pnfs_layout_hdr *lo; | ||
6348 | struct nfs4_state *state = NULL; | ||
6200 | 6349 | ||
6201 | dprintk("--> %s\n", __func__); | 6350 | dprintk("--> %s\n", __func__); |
6202 | 6351 | ||
6203 | if (!nfs4_sequence_done(task, &lgp->res.seq_res)) | 6352 | if (!nfs4_sequence_done(task, &lgp->res.seq_res)) |
6204 | return; | 6353 | goto out; |
6205 | 6354 | ||
6206 | switch (task->tk_status) { | 6355 | switch (task->tk_status) { |
6207 | case 0: | 6356 | case 0: |
6208 | break; | 6357 | goto out; |
6209 | case -NFS4ERR_LAYOUTTRYLATER: | 6358 | case -NFS4ERR_LAYOUTTRYLATER: |
6210 | case -NFS4ERR_RECALLCONFLICT: | 6359 | case -NFS4ERR_RECALLCONFLICT: |
6211 | task->tk_status = -NFS4ERR_DELAY; | 6360 | task->tk_status = -NFS4ERR_DELAY; |
6212 | /* Fall through */ | 6361 | break; |
6213 | default: | 6362 | case -NFS4ERR_EXPIRED: |
6214 | if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { | 6363 | case -NFS4ERR_BAD_STATEID: |
6215 | rpc_restart_call_prepare(task); | 6364 | spin_lock(&inode->i_lock); |
6216 | return; | 6365 | lo = NFS_I(inode)->layout; |
6366 | if (!lo || list_empty(&lo->plh_segs)) { | ||
6367 | spin_unlock(&inode->i_lock); | ||
6368 | /* If the open stateid was bad, then recover it. */ | ||
6369 | state = lgp->args.ctx->state; | ||
6370 | } else { | ||
6371 | LIST_HEAD(head); | ||
6372 | |||
6373 | pnfs_mark_matching_lsegs_invalid(lo, &head, NULL); | ||
6374 | spin_unlock(&inode->i_lock); | ||
6375 | /* Mark the bad layout state as invalid, then | ||
6376 | * retry using the open stateid. */ | ||
6377 | pnfs_free_lseg_list(&head); | ||
6217 | } | 6378 | } |
6218 | } | 6379 | } |
6380 | if (nfs4_async_handle_error(task, server, state) == -EAGAIN) | ||
6381 | rpc_restart_call_prepare(task); | ||
6382 | out: | ||
6219 | dprintk("<-- %s\n", __func__); | 6383 | dprintk("<-- %s\n", __func__); |
6220 | } | 6384 | } |
6221 | 6385 | ||
@@ -6282,7 +6446,8 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { | |||
6282 | .rpc_release = nfs4_layoutget_release, | 6446 | .rpc_release = nfs4_layoutget_release, |
6283 | }; | 6447 | }; |
6284 | 6448 | ||
6285 | void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) | 6449 | struct pnfs_layout_segment * |
6450 | nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) | ||
6286 | { | 6451 | { |
6287 | struct nfs_server *server = NFS_SERVER(lgp->args.inode); | 6452 | struct nfs_server *server = NFS_SERVER(lgp->args.inode); |
6288 | size_t max_pages = max_response_pages(server); | 6453 | size_t max_pages = max_response_pages(server); |
@@ -6299,6 +6464,7 @@ void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) | |||
6299 | .callback_data = lgp, | 6464 | .callback_data = lgp, |
6300 | .flags = RPC_TASK_ASYNC, | 6465 | .flags = RPC_TASK_ASYNC, |
6301 | }; | 6466 | }; |
6467 | struct pnfs_layout_segment *lseg = NULL; | ||
6302 | int status = 0; | 6468 | int status = 0; |
6303 | 6469 | ||
6304 | dprintk("--> %s\n", __func__); | 6470 | dprintk("--> %s\n", __func__); |
@@ -6306,7 +6472,7 @@ void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) | |||
6306 | lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); | 6472 | lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); |
6307 | if (!lgp->args.layout.pages) { | 6473 | if (!lgp->args.layout.pages) { |
6308 | nfs4_layoutget_release(lgp); | 6474 | nfs4_layoutget_release(lgp); |
6309 | return; | 6475 | return ERR_PTR(-ENOMEM); |
6310 | } | 6476 | } |
6311 | lgp->args.layout.pglen = max_pages * PAGE_SIZE; | 6477 | lgp->args.layout.pglen = max_pages * PAGE_SIZE; |
6312 | 6478 | ||
@@ -6315,15 +6481,17 @@ void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) | |||
6315 | nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); | 6481 | nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); |
6316 | task = rpc_run_task(&task_setup_data); | 6482 | task = rpc_run_task(&task_setup_data); |
6317 | if (IS_ERR(task)) | 6483 | if (IS_ERR(task)) |
6318 | return; | 6484 | return ERR_CAST(task); |
6319 | status = nfs4_wait_for_completion_rpc_task(task); | 6485 | status = nfs4_wait_for_completion_rpc_task(task); |
6320 | if (status == 0) | 6486 | if (status == 0) |
6321 | status = task->tk_status; | 6487 | status = task->tk_status; |
6322 | if (status == 0) | 6488 | if (status == 0) |
6323 | status = pnfs_layout_process(lgp); | 6489 | lseg = pnfs_layout_process(lgp); |
6324 | rpc_put_task(task); | 6490 | rpc_put_task(task); |
6325 | dprintk("<-- %s status=%d\n", __func__, status); | 6491 | dprintk("<-- %s status=%d\n", __func__, status); |
6326 | return; | 6492 | if (status) |
6493 | return ERR_PTR(status); | ||
6494 | return lseg; | ||
6327 | } | 6495 | } |
6328 | 6496 | ||
6329 | static void | 6497 | static void |
@@ -6342,7 +6510,6 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) | |||
6342 | { | 6510 | { |
6343 | struct nfs4_layoutreturn *lrp = calldata; | 6511 | struct nfs4_layoutreturn *lrp = calldata; |
6344 | struct nfs_server *server; | 6512 | struct nfs_server *server; |
6345 | struct pnfs_layout_hdr *lo = lrp->args.layout; | ||
6346 | 6513 | ||
6347 | dprintk("--> %s\n", __func__); | 6514 | dprintk("--> %s\n", __func__); |
6348 | 6515 | ||
@@ -6354,20 +6521,21 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) | |||
6354 | rpc_restart_call_prepare(task); | 6521 | rpc_restart_call_prepare(task); |
6355 | return; | 6522 | return; |
6356 | } | 6523 | } |
6357 | spin_lock(&lo->plh_inode->i_lock); | ||
6358 | if (task->tk_status == 0 && lrp->res.lrs_present) | ||
6359 | pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); | ||
6360 | lo->plh_block_lgets--; | ||
6361 | spin_unlock(&lo->plh_inode->i_lock); | ||
6362 | dprintk("<-- %s\n", __func__); | 6524 | dprintk("<-- %s\n", __func__); |
6363 | } | 6525 | } |
6364 | 6526 | ||
6365 | static void nfs4_layoutreturn_release(void *calldata) | 6527 | static void nfs4_layoutreturn_release(void *calldata) |
6366 | { | 6528 | { |
6367 | struct nfs4_layoutreturn *lrp = calldata; | 6529 | struct nfs4_layoutreturn *lrp = calldata; |
6530 | struct pnfs_layout_hdr *lo = lrp->args.layout; | ||
6368 | 6531 | ||
6369 | dprintk("--> %s\n", __func__); | 6532 | dprintk("--> %s\n", __func__); |
6370 | put_layout_hdr(lrp->args.layout); | 6533 | spin_lock(&lo->plh_inode->i_lock); |
6534 | if (lrp->res.lrs_present) | ||
6535 | pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); | ||
6536 | lo->plh_block_lgets--; | ||
6537 | spin_unlock(&lo->plh_inode->i_lock); | ||
6538 | pnfs_put_layout_hdr(lrp->args.layout); | ||
6371 | kfree(calldata); | 6539 | kfree(calldata); |
6372 | dprintk("<-- %s\n", __func__); | 6540 | dprintk("<-- %s\n", __func__); |
6373 | } | 6541 | } |
@@ -6541,7 +6709,7 @@ static void nfs4_layoutcommit_release(void *calldata) | |||
6541 | list_del_init(&lseg->pls_lc_list); | 6709 | list_del_init(&lseg->pls_lc_list); |
6542 | if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, | 6710 | if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, |
6543 | &lseg->pls_flags)) | 6711 | &lseg->pls_flags)) |
6544 | put_lseg(lseg); | 6712 | pnfs_put_lseg(lseg); |
6545 | } | 6713 | } |
6546 | 6714 | ||
6547 | clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); | 6715 | clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); |
@@ -6800,6 +6968,7 @@ static const struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { | |||
6800 | .recover_lock = nfs4_lock_reclaim, | 6968 | .recover_lock = nfs4_lock_reclaim, |
6801 | .establish_clid = nfs4_init_clientid, | 6969 | .establish_clid = nfs4_init_clientid, |
6802 | .get_clid_cred = nfs4_get_setclientid_cred, | 6970 | .get_clid_cred = nfs4_get_setclientid_cred, |
6971 | .detect_trunking = nfs40_discover_server_trunking, | ||
6803 | }; | 6972 | }; |
6804 | 6973 | ||
6805 | #if defined(CONFIG_NFS_V4_1) | 6974 | #if defined(CONFIG_NFS_V4_1) |
@@ -6811,6 +6980,7 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { | |||
6811 | .establish_clid = nfs41_init_clientid, | 6980 | .establish_clid = nfs41_init_clientid, |
6812 | .get_clid_cred = nfs4_get_exchange_id_cred, | 6981 | .get_clid_cred = nfs4_get_exchange_id_cred, |
6813 | .reclaim_complete = nfs41_proc_reclaim_complete, | 6982 | .reclaim_complete = nfs41_proc_reclaim_complete, |
6983 | .detect_trunking = nfs41_discover_server_trunking, | ||
6814 | }; | 6984 | }; |
6815 | #endif /* CONFIG_NFS_V4_1 */ | 6985 | #endif /* CONFIG_NFS_V4_1 */ |
6816 | 6986 | ||
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 55148def5540..c351e6b39838 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -51,18 +51,21 @@ | |||
51 | #include <linux/bitops.h> | 51 | #include <linux/bitops.h> |
52 | #include <linux/jiffies.h> | 52 | #include <linux/jiffies.h> |
53 | 53 | ||
54 | #include <linux/sunrpc/clnt.h> | ||
55 | |||
54 | #include "nfs4_fs.h" | 56 | #include "nfs4_fs.h" |
55 | #include "callback.h" | 57 | #include "callback.h" |
56 | #include "delegation.h" | 58 | #include "delegation.h" |
57 | #include "internal.h" | 59 | #include "internal.h" |
58 | #include "pnfs.h" | 60 | #include "pnfs.h" |
61 | #include "netns.h" | ||
59 | 62 | ||
60 | #define NFSDBG_FACILITY NFSDBG_STATE | 63 | #define NFSDBG_FACILITY NFSDBG_STATE |
61 | 64 | ||
62 | #define OPENOWNER_POOL_SIZE 8 | 65 | #define OPENOWNER_POOL_SIZE 8 |
63 | 66 | ||
64 | const nfs4_stateid zero_stateid; | 67 | const nfs4_stateid zero_stateid; |
65 | 68 | static DEFINE_MUTEX(nfs_clid_init_mutex); | |
66 | static LIST_HEAD(nfs4_clientid_list); | 69 | static LIST_HEAD(nfs4_clientid_list); |
67 | 70 | ||
68 | int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) | 71 | int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) |
@@ -73,12 +76,13 @@ int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) | |||
73 | }; | 76 | }; |
74 | unsigned short port; | 77 | unsigned short port; |
75 | int status; | 78 | int status; |
79 | struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); | ||
76 | 80 | ||
77 | if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state)) | 81 | if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state)) |
78 | goto do_confirm; | 82 | goto do_confirm; |
79 | port = nfs_callback_tcpport; | 83 | port = nn->nfs_callback_tcpport; |
80 | if (clp->cl_addr.ss_family == AF_INET6) | 84 | if (clp->cl_addr.ss_family == AF_INET6) |
81 | port = nfs_callback_tcpport6; | 85 | port = nn->nfs_callback_tcpport6; |
82 | 86 | ||
83 | status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid); | 87 | status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid); |
84 | if (status != 0) | 88 | if (status != 0) |
@@ -96,6 +100,56 @@ out: | |||
96 | return status; | 100 | return status; |
97 | } | 101 | } |
98 | 102 | ||
103 | /** | ||
104 | * nfs40_discover_server_trunking - Detect server IP address trunking (mv0) | ||
105 | * | ||
106 | * @clp: nfs_client under test | ||
107 | * @result: OUT: found nfs_client, or clp | ||
108 | * @cred: credential to use for trunking test | ||
109 | * | ||
110 | * Returns zero, a negative errno, or a negative NFS4ERR status. | ||
111 | * If zero is returned, an nfs_client pointer is planted in | ||
112 | * "result". | ||
113 | * | ||
114 | * Note: The returned client may not yet be marked ready. | ||
115 | */ | ||
116 | int nfs40_discover_server_trunking(struct nfs_client *clp, | ||
117 | struct nfs_client **result, | ||
118 | struct rpc_cred *cred) | ||
119 | { | ||
120 | struct nfs4_setclientid_res clid = { | ||
121 | .clientid = clp->cl_clientid, | ||
122 | .confirm = clp->cl_confirm, | ||
123 | }; | ||
124 | struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); | ||
125 | unsigned short port; | ||
126 | int status; | ||
127 | |||
128 | port = nn->nfs_callback_tcpport; | ||
129 | if (clp->cl_addr.ss_family == AF_INET6) | ||
130 | port = nn->nfs_callback_tcpport6; | ||
131 | |||
132 | status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid); | ||
133 | if (status != 0) | ||
134 | goto out; | ||
135 | clp->cl_clientid = clid.clientid; | ||
136 | clp->cl_confirm = clid.confirm; | ||
137 | |||
138 | status = nfs40_walk_client_list(clp, result, cred); | ||
139 | switch (status) { | ||
140 | case -NFS4ERR_STALE_CLIENTID: | ||
141 | set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); | ||
142 | case 0: | ||
143 | /* Sustain the lease, even if it's empty. If the clientid4 | ||
144 | * goes stale it's of no use for trunking discovery. */ | ||
145 | nfs4_schedule_state_renewal(*result); | ||
146 | break; | ||
147 | } | ||
148 | |||
149 | out: | ||
150 | return status; | ||
151 | } | ||
152 | |||
99 | struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp) | 153 | struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp) |
100 | { | 154 | { |
101 | struct rpc_cred *cred = NULL; | 155 | struct rpc_cred *cred = NULL; |
@@ -275,6 +329,33 @@ out: | |||
275 | return status; | 329 | return status; |
276 | } | 330 | } |
277 | 331 | ||
332 | /** | ||
333 | * nfs41_discover_server_trunking - Detect server IP address trunking (mv1) | ||
334 | * | ||
335 | * @clp: nfs_client under test | ||
336 | * @result: OUT: found nfs_client, or clp | ||
337 | * @cred: credential to use for trunking test | ||
338 | * | ||
339 | * Returns NFS4_OK, a negative errno, or a negative NFS4ERR status. | ||
340 | * If NFS4_OK is returned, an nfs_client pointer is planted in | ||
341 | * "result". | ||
342 | * | ||
343 | * Note: The returned client may not yet be marked ready. | ||
344 | */ | ||
345 | int nfs41_discover_server_trunking(struct nfs_client *clp, | ||
346 | struct nfs_client **result, | ||
347 | struct rpc_cred *cred) | ||
348 | { | ||
349 | int status; | ||
350 | |||
351 | status = nfs4_proc_exchange_id(clp, cred); | ||
352 | if (status != NFS4_OK) | ||
353 | return status; | ||
354 | set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); | ||
355 | |||
356 | return nfs41_walk_client_list(clp, result, cred); | ||
357 | } | ||
358 | |||
278 | struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp) | 359 | struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp) |
279 | { | 360 | { |
280 | struct rpc_cred *cred; | 361 | struct rpc_cred *cred; |
@@ -729,11 +810,8 @@ static void __nfs4_close(struct nfs4_state *state, | |||
729 | if (!call_close) { | 810 | if (!call_close) { |
730 | nfs4_put_open_state(state); | 811 | nfs4_put_open_state(state); |
731 | nfs4_put_state_owner(owner); | 812 | nfs4_put_state_owner(owner); |
732 | } else { | 813 | } else |
733 | bool roc = pnfs_roc(state->inode); | 814 | nfs4_do_close(state, gfp_mask, wait); |
734 | |||
735 | nfs4_do_close(state, gfp_mask, wait, roc); | ||
736 | } | ||
737 | } | 815 | } |
738 | 816 | ||
739 | void nfs4_close_state(struct nfs4_state *state, fmode_t fmode) | 817 | void nfs4_close_state(struct nfs4_state *state, fmode_t fmode) |
@@ -865,7 +943,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) | |||
865 | if (list_empty(&state->lock_states)) | 943 | if (list_empty(&state->lock_states)) |
866 | clear_bit(LK_STATE_IN_USE, &state->flags); | 944 | clear_bit(LK_STATE_IN_USE, &state->flags); |
867 | spin_unlock(&state->state_lock); | 945 | spin_unlock(&state->state_lock); |
868 | if (lsp->ls_flags & NFS_LOCK_INITIALIZED) { | 946 | if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { |
869 | if (nfs4_release_lockowner(lsp) == 0) | 947 | if (nfs4_release_lockowner(lsp) == 0) |
870 | return; | 948 | return; |
871 | } | 949 | } |
@@ -911,17 +989,25 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) | |||
911 | } | 989 | } |
912 | 990 | ||
913 | static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, | 991 | static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, |
914 | fl_owner_t fl_owner, pid_t fl_pid) | 992 | const struct nfs_lockowner *lockowner) |
915 | { | 993 | { |
916 | struct nfs4_lock_state *lsp; | 994 | struct nfs4_lock_state *lsp; |
995 | fl_owner_t fl_owner; | ||
996 | pid_t fl_pid; | ||
917 | bool ret = false; | 997 | bool ret = false; |
918 | 998 | ||
999 | |||
1000 | if (lockowner == NULL) | ||
1001 | goto out; | ||
1002 | |||
919 | if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) | 1003 | if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) |
920 | goto out; | 1004 | goto out; |
921 | 1005 | ||
1006 | fl_owner = lockowner->l_owner; | ||
1007 | fl_pid = lockowner->l_pid; | ||
922 | spin_lock(&state->state_lock); | 1008 | spin_lock(&state->state_lock); |
923 | lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); | 1009 | lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); |
924 | if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) { | 1010 | if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { |
925 | nfs4_stateid_copy(dst, &lsp->ls_stateid); | 1011 | nfs4_stateid_copy(dst, &lsp->ls_stateid); |
926 | ret = true; | 1012 | ret = true; |
927 | } | 1013 | } |
@@ -946,11 +1032,11 @@ static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) | |||
946 | * requests. | 1032 | * requests. |
947 | */ | 1033 | */ |
948 | void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, | 1034 | void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, |
949 | fmode_t fmode, fl_owner_t fl_owner, pid_t fl_pid) | 1035 | fmode_t fmode, const struct nfs_lockowner *lockowner) |
950 | { | 1036 | { |
951 | if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) | 1037 | if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) |
952 | return; | 1038 | return; |
953 | if (nfs4_copy_lock_stateid(dst, state, fl_owner, fl_pid)) | 1039 | if (nfs4_copy_lock_stateid(dst, state, lockowner)) |
954 | return; | 1040 | return; |
955 | nfs4_copy_open_stateid(dst, state); | 1041 | nfs4_copy_open_stateid(dst, state); |
956 | } | 1042 | } |
@@ -1289,7 +1375,7 @@ restart: | |||
1289 | if (status >= 0) { | 1375 | if (status >= 0) { |
1290 | spin_lock(&state->state_lock); | 1376 | spin_lock(&state->state_lock); |
1291 | list_for_each_entry(lock, &state->lock_states, ls_locks) { | 1377 | list_for_each_entry(lock, &state->lock_states, ls_locks) { |
1292 | if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) | 1378 | if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags)) |
1293 | pr_warn_ratelimited("NFS: " | 1379 | pr_warn_ratelimited("NFS: " |
1294 | "%s: Lock reclaim " | 1380 | "%s: Lock reclaim " |
1295 | "failed!\n", __func__); | 1381 | "failed!\n", __func__); |
@@ -1361,7 +1447,7 @@ static void nfs4_clear_open_state(struct nfs4_state *state) | |||
1361 | spin_lock(&state->state_lock); | 1447 | spin_lock(&state->state_lock); |
1362 | list_for_each_entry(lock, &state->lock_states, ls_locks) { | 1448 | list_for_each_entry(lock, &state->lock_states, ls_locks) { |
1363 | lock->ls_seqid.flags = 0; | 1449 | lock->ls_seqid.flags = 0; |
1364 | lock->ls_flags &= ~NFS_LOCK_INITIALIZED; | 1450 | clear_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags); |
1365 | } | 1451 | } |
1366 | spin_unlock(&state->state_lock); | 1452 | spin_unlock(&state->state_lock); |
1367 | } | 1453 | } |
@@ -1595,8 +1681,8 @@ out: | |||
1595 | return nfs4_recovery_handle_error(clp, status); | 1681 | return nfs4_recovery_handle_error(clp, status); |
1596 | } | 1682 | } |
1597 | 1683 | ||
1598 | /* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors | 1684 | /* Set NFS4CLNT_LEASE_EXPIRED and reclaim reboot state for all v4.0 errors |
1599 | * on EXCHANGE_ID for v4.1 | 1685 | * and for recoverable errors on EXCHANGE_ID for v4.1 |
1600 | */ | 1686 | */ |
1601 | static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) | 1687 | static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) |
1602 | { | 1688 | { |
@@ -1606,8 +1692,12 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) | |||
1606 | return -ESERVERFAULT; | 1692 | return -ESERVERFAULT; |
1607 | /* Lease confirmation error: retry after purging the lease */ | 1693 | /* Lease confirmation error: retry after purging the lease */ |
1608 | ssleep(1); | 1694 | ssleep(1); |
1695 | clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); | ||
1696 | break; | ||
1609 | case -NFS4ERR_STALE_CLIENTID: | 1697 | case -NFS4ERR_STALE_CLIENTID: |
1610 | clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); | 1698 | clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); |
1699 | nfs4_state_clear_reclaim_reboot(clp); | ||
1700 | nfs4_state_start_reclaim_reboot(clp); | ||
1611 | break; | 1701 | break; |
1612 | case -NFS4ERR_CLID_INUSE: | 1702 | case -NFS4ERR_CLID_INUSE: |
1613 | pr_err("NFS: Server %s reports our clientid is in use\n", | 1703 | pr_err("NFS: Server %s reports our clientid is in use\n", |
@@ -1698,6 +1788,109 @@ static int nfs4_purge_lease(struct nfs_client *clp) | |||
1698 | return 0; | 1788 | return 0; |
1699 | } | 1789 | } |
1700 | 1790 | ||
1791 | /** | ||
1792 | * nfs4_discover_server_trunking - Detect server IP address trunking | ||
1793 | * | ||
1794 | * @clp: nfs_client under test | ||
1795 | * @result: OUT: found nfs_client, or clp | ||
1796 | * | ||
1797 | * Returns zero or a negative errno. If zero is returned, | ||
1798 | * an nfs_client pointer is planted in "result". | ||
1799 | * | ||
1800 | * Note: since we are invoked in process context, and | ||
1801 | * not from inside the state manager, we cannot use | ||
1802 | * nfs4_handle_reclaim_lease_error(). | ||
1803 | */ | ||
1804 | int nfs4_discover_server_trunking(struct nfs_client *clp, | ||
1805 | struct nfs_client **result) | ||
1806 | { | ||
1807 | const struct nfs4_state_recovery_ops *ops = | ||
1808 | clp->cl_mvops->reboot_recovery_ops; | ||
1809 | rpc_authflavor_t *flavors, flav, save; | ||
1810 | struct rpc_clnt *clnt; | ||
1811 | struct rpc_cred *cred; | ||
1812 | int i, len, status; | ||
1813 | |||
1814 | dprintk("NFS: %s: testing '%s'\n", __func__, clp->cl_hostname); | ||
1815 | |||
1816 | len = NFS_MAX_SECFLAVORS; | ||
1817 | flavors = kcalloc(len, sizeof(*flavors), GFP_KERNEL); | ||
1818 | if (flavors == NULL) { | ||
1819 | status = -ENOMEM; | ||
1820 | goto out; | ||
1821 | } | ||
1822 | len = rpcauth_list_flavors(flavors, len); | ||
1823 | if (len < 0) { | ||
1824 | status = len; | ||
1825 | goto out_free; | ||
1826 | } | ||
1827 | clnt = clp->cl_rpcclient; | ||
1828 | save = clnt->cl_auth->au_flavor; | ||
1829 | i = 0; | ||
1830 | |||
1831 | mutex_lock(&nfs_clid_init_mutex); | ||
1832 | status = -ENOENT; | ||
1833 | again: | ||
1834 | cred = ops->get_clid_cred(clp); | ||
1835 | if (cred == NULL) | ||
1836 | goto out_unlock; | ||
1837 | |||
1838 | status = ops->detect_trunking(clp, result, cred); | ||
1839 | put_rpccred(cred); | ||
1840 | switch (status) { | ||
1841 | case 0: | ||
1842 | break; | ||
1843 | |||
1844 | case -EACCES: | ||
1845 | if (clp->cl_machine_cred == NULL) | ||
1846 | break; | ||
1847 | /* Handle case where the user hasn't set up machine creds */ | ||
1848 | nfs4_clear_machine_cred(clp); | ||
1849 | case -NFS4ERR_DELAY: | ||
1850 | case -ETIMEDOUT: | ||
1851 | case -EAGAIN: | ||
1852 | ssleep(1); | ||
1853 | dprintk("NFS: %s after status %d, retrying\n", | ||
1854 | __func__, status); | ||
1855 | goto again; | ||
1856 | |||
1857 | case -NFS4ERR_CLID_INUSE: | ||
1858 | case -NFS4ERR_WRONGSEC: | ||
1859 | status = -EPERM; | ||
1860 | if (i >= len) | ||
1861 | break; | ||
1862 | |||
1863 | flav = flavors[i++]; | ||
1864 | if (flav == save) | ||
1865 | flav = flavors[i++]; | ||
1866 | clnt = rpc_clone_client_set_auth(clnt, flav); | ||
1867 | if (IS_ERR(clnt)) { | ||
1868 | status = PTR_ERR(clnt); | ||
1869 | break; | ||
1870 | } | ||
1871 | clp->cl_rpcclient = clnt; | ||
1872 | goto again; | ||
1873 | |||
1874 | case -NFS4ERR_MINOR_VERS_MISMATCH: | ||
1875 | status = -EPROTONOSUPPORT; | ||
1876 | break; | ||
1877 | |||
1878 | case -EKEYEXPIRED: | ||
1879 | nfs4_warn_keyexpired(clp->cl_hostname); | ||
1880 | case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery | ||
1881 | * in nfs4_exchange_id */ | ||
1882 | status = -EKEYEXPIRED; | ||
1883 | } | ||
1884 | |||
1885 | out_unlock: | ||
1886 | mutex_unlock(&nfs_clid_init_mutex); | ||
1887 | out_free: | ||
1888 | kfree(flavors); | ||
1889 | out: | ||
1890 | dprintk("NFS: %s: status = %d\n", __func__, status); | ||
1891 | return status; | ||
1892 | } | ||
1893 | |||
1701 | #ifdef CONFIG_NFS_V4_1 | 1894 | #ifdef CONFIG_NFS_V4_1 |
1702 | void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) | 1895 | void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) |
1703 | { | 1896 | { |
@@ -2008,6 +2201,7 @@ out_error: | |||
2008 | pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s" | 2201 | pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s" |
2009 | " with error %d\n", section_sep, section, | 2202 | " with error %d\n", section_sep, section, |
2010 | clp->cl_hostname, -status); | 2203 | clp->cl_hostname, -status); |
2204 | ssleep(1); | ||
2011 | nfs4_end_drain_session(clp); | 2205 | nfs4_end_drain_session(clp); |
2012 | nfs4_clear_state_manager_bit(clp); | 2206 | nfs4_clear_state_manager_bit(clp); |
2013 | } | 2207 | } |
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c index 5729bc8aa75d..2628d921b7e3 100644 --- a/fs/nfs/nfs4sysctl.c +++ b/fs/nfs/nfs4sysctl.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/nfs_idmap.h> | 9 | #include <linux/nfs_idmap.h> |
10 | #include <linux/nfs_fs.h> | 10 | #include <linux/nfs_fs.h> |
11 | 11 | ||
12 | #include "nfs4_fs.h" | ||
12 | #include "callback.h" | 13 | #include "callback.h" |
13 | 14 | ||
14 | static const int nfs_set_port_min = 0; | 15 | static const int nfs_set_port_min = 0; |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 8dba6bd48557..40836ee5dc3a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -447,12 +447,14 @@ static int nfs4_stat_to_errno(int); | |||
447 | encode_sequence_maxsz + \ | 447 | encode_sequence_maxsz + \ |
448 | encode_putfh_maxsz + \ | 448 | encode_putfh_maxsz + \ |
449 | encode_open_maxsz + \ | 449 | encode_open_maxsz + \ |
450 | encode_access_maxsz + \ | ||
450 | encode_getfh_maxsz + \ | 451 | encode_getfh_maxsz + \ |
451 | encode_getattr_maxsz) | 452 | encode_getattr_maxsz) |
452 | #define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ | 453 | #define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ |
453 | decode_sequence_maxsz + \ | 454 | decode_sequence_maxsz + \ |
454 | decode_putfh_maxsz + \ | 455 | decode_putfh_maxsz + \ |
455 | decode_open_maxsz + \ | 456 | decode_open_maxsz + \ |
457 | decode_access_maxsz + \ | ||
456 | decode_getfh_maxsz + \ | 458 | decode_getfh_maxsz + \ |
457 | decode_getattr_maxsz) | 459 | decode_getattr_maxsz) |
458 | #define NFS4_enc_open_confirm_sz \ | 460 | #define NFS4_enc_open_confirm_sz \ |
@@ -467,11 +469,13 @@ static int nfs4_stat_to_errno(int); | |||
467 | encode_sequence_maxsz + \ | 469 | encode_sequence_maxsz + \ |
468 | encode_putfh_maxsz + \ | 470 | encode_putfh_maxsz + \ |
469 | encode_open_maxsz + \ | 471 | encode_open_maxsz + \ |
472 | encode_access_maxsz + \ | ||
470 | encode_getattr_maxsz) | 473 | encode_getattr_maxsz) |
471 | #define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ | 474 | #define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ |
472 | decode_sequence_maxsz + \ | 475 | decode_sequence_maxsz + \ |
473 | decode_putfh_maxsz + \ | 476 | decode_putfh_maxsz + \ |
474 | decode_open_maxsz + \ | 477 | decode_open_maxsz + \ |
478 | decode_access_maxsz + \ | ||
475 | decode_getattr_maxsz) | 479 | decode_getattr_maxsz) |
476 | #define NFS4_enc_open_downgrade_sz \ | 480 | #define NFS4_enc_open_downgrade_sz \ |
477 | (compound_encode_hdr_maxsz + \ | 481 | (compound_encode_hdr_maxsz + \ |
@@ -1509,8 +1513,12 @@ static void encode_open_stateid(struct xdr_stream *xdr, | |||
1509 | nfs4_stateid stateid; | 1513 | nfs4_stateid stateid; |
1510 | 1514 | ||
1511 | if (ctx->state != NULL) { | 1515 | if (ctx->state != NULL) { |
1516 | const struct nfs_lockowner *lockowner = NULL; | ||
1517 | |||
1518 | if (l_ctx != NULL) | ||
1519 | lockowner = &l_ctx->lockowner; | ||
1512 | nfs4_select_rw_stateid(&stateid, ctx->state, | 1520 | nfs4_select_rw_stateid(&stateid, ctx->state, |
1513 | fmode, l_ctx->lockowner, l_ctx->pid); | 1521 | fmode, lockowner); |
1514 | if (zero_seqid) | 1522 | if (zero_seqid) |
1515 | stateid.seqid = 0; | 1523 | stateid.seqid = 0; |
1516 | encode_nfs4_stateid(xdr, &stateid); | 1524 | encode_nfs4_stateid(xdr, &stateid); |
@@ -2216,6 +2224,8 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, | |||
2216 | encode_putfh(xdr, args->fh, &hdr); | 2224 | encode_putfh(xdr, args->fh, &hdr); |
2217 | encode_open(xdr, args, &hdr); | 2225 | encode_open(xdr, args, &hdr); |
2218 | encode_getfh(xdr, &hdr); | 2226 | encode_getfh(xdr, &hdr); |
2227 | if (args->access) | ||
2228 | encode_access(xdr, args->access, &hdr); | ||
2219 | encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); | 2229 | encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); |
2220 | encode_nops(&hdr); | 2230 | encode_nops(&hdr); |
2221 | } | 2231 | } |
@@ -2252,7 +2262,9 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, | |||
2252 | encode_sequence(xdr, &args->seq_args, &hdr); | 2262 | encode_sequence(xdr, &args->seq_args, &hdr); |
2253 | encode_putfh(xdr, args->fh, &hdr); | 2263 | encode_putfh(xdr, args->fh, &hdr); |
2254 | encode_open(xdr, args, &hdr); | 2264 | encode_open(xdr, args, &hdr); |
2255 | encode_getfattr(xdr, args->bitmask, &hdr); | 2265 | if (args->access) |
2266 | encode_access(xdr, args->access, &hdr); | ||
2267 | encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); | ||
2256 | encode_nops(&hdr); | 2268 | encode_nops(&hdr); |
2257 | } | 2269 | } |
2258 | 2270 | ||
@@ -4095,7 +4107,7 @@ out_overflow: | |||
4095 | return -EIO; | 4107 | return -EIO; |
4096 | } | 4108 | } |
4097 | 4109 | ||
4098 | static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access) | 4110 | static int decode_access(struct xdr_stream *xdr, u32 *supported, u32 *access) |
4099 | { | 4111 | { |
4100 | __be32 *p; | 4112 | __be32 *p; |
4101 | uint32_t supp, acc; | 4113 | uint32_t supp, acc; |
@@ -4109,8 +4121,8 @@ static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access) | |||
4109 | goto out_overflow; | 4121 | goto out_overflow; |
4110 | supp = be32_to_cpup(p++); | 4122 | supp = be32_to_cpup(p++); |
4111 | acc = be32_to_cpup(p); | 4123 | acc = be32_to_cpup(p); |
4112 | access->supported = supp; | 4124 | *supported = supp; |
4113 | access->access = acc; | 4125 | *access = acc; |
4114 | return 0; | 4126 | return 0; |
4115 | out_overflow: | 4127 | out_overflow: |
4116 | print_overflow_msg(__func__, xdr); | 4128 | print_overflow_msg(__func__, xdr); |
@@ -5642,7 +5654,8 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr, | |||
5642 | * and places the remaining xdr data in xdr_buf->tail | 5654 | * and places the remaining xdr data in xdr_buf->tail |
5643 | */ | 5655 | */ |
5644 | pdev->mincount = be32_to_cpup(p); | 5656 | pdev->mincount = be32_to_cpup(p); |
5645 | xdr_read_pages(xdr, pdev->mincount); /* include space for the length */ | 5657 | if (xdr_read_pages(xdr, pdev->mincount) != pdev->mincount) |
5658 | goto out_overflow; | ||
5646 | 5659 | ||
5647 | /* Parse notification bitmap, verifying that it is zero. */ | 5660 | /* Parse notification bitmap, verifying that it is zero. */ |
5648 | p = xdr_inline_decode(xdr, 4); | 5661 | p = xdr_inline_decode(xdr, 4); |
@@ -5887,7 +5900,7 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
5887 | status = decode_putfh(xdr); | 5900 | status = decode_putfh(xdr); |
5888 | if (status != 0) | 5901 | if (status != 0) |
5889 | goto out; | 5902 | goto out; |
5890 | status = decode_access(xdr, res); | 5903 | status = decode_access(xdr, &res->supported, &res->access); |
5891 | if (status != 0) | 5904 | if (status != 0) |
5892 | goto out; | 5905 | goto out; |
5893 | decode_getfattr(xdr, res->fattr, res->server); | 5906 | decode_getfattr(xdr, res->fattr, res->server); |
@@ -6228,6 +6241,8 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
6228 | status = decode_getfh(xdr, &res->fh); | 6241 | status = decode_getfh(xdr, &res->fh); |
6229 | if (status) | 6242 | if (status) |
6230 | goto out; | 6243 | goto out; |
6244 | if (res->access_request) | ||
6245 | decode_access(xdr, &res->access_supported, &res->access_result); | ||
6231 | decode_getfattr(xdr, res->f_attr, res->server); | 6246 | decode_getfattr(xdr, res->f_attr, res->server); |
6232 | out: | 6247 | out: |
6233 | return status; | 6248 | return status; |
@@ -6276,6 +6291,8 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, | |||
6276 | status = decode_open(xdr, res); | 6291 | status = decode_open(xdr, res); |
6277 | if (status) | 6292 | if (status) |
6278 | goto out; | 6293 | goto out; |
6294 | if (res->access_request) | ||
6295 | decode_access(xdr, &res->access_supported, &res->access_result); | ||
6279 | decode_getfattr(xdr, res->f_attr, res->server); | 6296 | decode_getfattr(xdr, res->f_attr, res->server); |
6280 | out: | 6297 | out: |
6281 | return status; | 6298 | return status; |
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index ea6d111b03e9..be731e6b7b9c 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <scsi/osd_ore.h> | 41 | #include <scsi/osd_ore.h> |
42 | 42 | ||
43 | #include "objlayout.h" | 43 | #include "objlayout.h" |
44 | #include "../internal.h" | ||
44 | 45 | ||
45 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | 46 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD |
46 | 47 | ||
@@ -606,8 +607,14 @@ static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout, | |||
606 | void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 607 | void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) |
607 | { | 608 | { |
608 | unsigned long stripe_end = 0; | 609 | unsigned long stripe_end = 0; |
610 | u64 wb_size; | ||
609 | 611 | ||
610 | pnfs_generic_pg_init_write(pgio, req); | 612 | if (pgio->pg_dreq == NULL) |
613 | wb_size = i_size_read(pgio->pg_inode) - req_offset(req); | ||
614 | else | ||
615 | wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); | ||
616 | |||
617 | pnfs_generic_pg_init_write(pgio, req, wb_size); | ||
611 | if (unlikely(pgio->pg_lseg == NULL)) | 618 | if (unlikely(pgio->pg_lseg == NULL)) |
612 | return; /* Not pNFS */ | 619 | return; /* Not pNFS */ |
613 | 620 | ||
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 311a79681e2b..e56e846e9d2d 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -102,6 +102,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, | |||
102 | unsigned int offset, unsigned int count) | 102 | unsigned int offset, unsigned int count) |
103 | { | 103 | { |
104 | struct nfs_page *req; | 104 | struct nfs_page *req; |
105 | struct nfs_lock_context *l_ctx; | ||
105 | 106 | ||
106 | /* try to allocate the request struct */ | 107 | /* try to allocate the request struct */ |
107 | req = nfs_page_alloc(); | 108 | req = nfs_page_alloc(); |
@@ -109,11 +110,12 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, | |||
109 | return ERR_PTR(-ENOMEM); | 110 | return ERR_PTR(-ENOMEM); |
110 | 111 | ||
111 | /* get lock context early so we can deal with alloc failures */ | 112 | /* get lock context early so we can deal with alloc failures */ |
112 | req->wb_lock_context = nfs_get_lock_context(ctx); | 113 | l_ctx = nfs_get_lock_context(ctx); |
113 | if (req->wb_lock_context == NULL) { | 114 | if (IS_ERR(l_ctx)) { |
114 | nfs_page_free(req); | 115 | nfs_page_free(req); |
115 | return ERR_PTR(-ENOMEM); | 116 | return ERR_CAST(l_ctx); |
116 | } | 117 | } |
118 | req->wb_lock_context = l_ctx; | ||
117 | 119 | ||
118 | /* Initialize the request struct. Initially, we assume a | 120 | /* Initialize the request struct. Initially, we assume a |
119 | * long write-back delay. This will be adjusted in | 121 | * long write-back delay. This will be adjusted in |
@@ -290,7 +292,9 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, | |||
290 | { | 292 | { |
291 | if (req->wb_context->cred != prev->wb_context->cred) | 293 | if (req->wb_context->cred != prev->wb_context->cred) |
292 | return false; | 294 | return false; |
293 | if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner) | 295 | if (req->wb_lock_context->lockowner.l_owner != prev->wb_lock_context->lockowner.l_owner) |
296 | return false; | ||
297 | if (req->wb_lock_context->lockowner.l_pid != prev->wb_lock_context->lockowner.l_pid) | ||
294 | return false; | 298 | return false; |
295 | if (req->wb_context->state != prev->wb_context->state) | 299 | if (req->wb_context->state != prev->wb_context->state) |
296 | return false; | 300 | return false; |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2e00feacd4be..fe624c91bd00 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include "iostat.h" | 35 | #include "iostat.h" |
36 | 36 | ||
37 | #define NFSDBG_FACILITY NFSDBG_PNFS | 37 | #define NFSDBG_FACILITY NFSDBG_PNFS |
38 | #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) | ||
38 | 39 | ||
39 | /* Locking: | 40 | /* Locking: |
40 | * | 41 | * |
@@ -190,7 +191,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); | |||
190 | 191 | ||
191 | /* Need to hold i_lock if caller does not already hold reference */ | 192 | /* Need to hold i_lock if caller does not already hold reference */ |
192 | void | 193 | void |
193 | get_layout_hdr(struct pnfs_layout_hdr *lo) | 194 | pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo) |
194 | { | 195 | { |
195 | atomic_inc(&lo->plh_refcount); | 196 | atomic_inc(&lo->plh_refcount); |
196 | } | 197 | } |
@@ -199,43 +200,107 @@ static struct pnfs_layout_hdr * | |||
199 | pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) | 200 | pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) |
200 | { | 201 | { |
201 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; | 202 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; |
202 | return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) : | 203 | return ld->alloc_layout_hdr(ino, gfp_flags); |
203 | kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); | ||
204 | } | 204 | } |
205 | 205 | ||
206 | static void | 206 | static void |
207 | pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) | 207 | pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) |
208 | { | 208 | { |
209 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld; | 209 | struct nfs_server *server = NFS_SERVER(lo->plh_inode); |
210 | struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; | ||
211 | |||
212 | if (!list_empty(&lo->plh_layouts)) { | ||
213 | struct nfs_client *clp = server->nfs_client; | ||
214 | |||
215 | spin_lock(&clp->cl_lock); | ||
216 | list_del_init(&lo->plh_layouts); | ||
217 | spin_unlock(&clp->cl_lock); | ||
218 | } | ||
210 | put_rpccred(lo->plh_lc_cred); | 219 | put_rpccred(lo->plh_lc_cred); |
211 | return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo); | 220 | return ld->free_layout_hdr(lo); |
212 | } | 221 | } |
213 | 222 | ||
214 | static void | 223 | static void |
215 | destroy_layout_hdr(struct pnfs_layout_hdr *lo) | 224 | pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo) |
216 | { | 225 | { |
226 | struct nfs_inode *nfsi = NFS_I(lo->plh_inode); | ||
217 | dprintk("%s: freeing layout cache %p\n", __func__, lo); | 227 | dprintk("%s: freeing layout cache %p\n", __func__, lo); |
218 | BUG_ON(!list_empty(&lo->plh_layouts)); | 228 | nfsi->layout = NULL; |
219 | NFS_I(lo->plh_inode)->layout = NULL; | 229 | /* Reset MDS Threshold I/O counters */ |
220 | pnfs_free_layout_hdr(lo); | 230 | nfsi->write_io = 0; |
231 | nfsi->read_io = 0; | ||
232 | } | ||
233 | |||
234 | void | ||
235 | pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) | ||
236 | { | ||
237 | struct inode *inode = lo->plh_inode; | ||
238 | |||
239 | if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { | ||
240 | pnfs_detach_layout_hdr(lo); | ||
241 | spin_unlock(&inode->i_lock); | ||
242 | pnfs_free_layout_hdr(lo); | ||
243 | } | ||
244 | } | ||
245 | |||
246 | static int | ||
247 | pnfs_iomode_to_fail_bit(u32 iomode) | ||
248 | { | ||
249 | return iomode == IOMODE_RW ? | ||
250 | NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; | ||
221 | } | 251 | } |
222 | 252 | ||
223 | static void | 253 | static void |
224 | put_layout_hdr_locked(struct pnfs_layout_hdr *lo) | 254 | pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) |
225 | { | 255 | { |
226 | if (atomic_dec_and_test(&lo->plh_refcount)) | 256 | lo->plh_retry_timestamp = jiffies; |
227 | destroy_layout_hdr(lo); | 257 | if (test_and_set_bit(fail_bit, &lo->plh_flags)) |
258 | atomic_inc(&lo->plh_refcount); | ||
228 | } | 259 | } |
229 | 260 | ||
230 | void | 261 | static void |
231 | put_layout_hdr(struct pnfs_layout_hdr *lo) | 262 | pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) |
263 | { | ||
264 | if (test_and_clear_bit(fail_bit, &lo->plh_flags)) | ||
265 | atomic_dec(&lo->plh_refcount); | ||
266 | } | ||
267 | |||
268 | static void | ||
269 | pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode) | ||
232 | { | 270 | { |
233 | struct inode *inode = lo->plh_inode; | 271 | struct inode *inode = lo->plh_inode; |
272 | struct pnfs_layout_range range = { | ||
273 | .iomode = iomode, | ||
274 | .offset = 0, | ||
275 | .length = NFS4_MAX_UINT64, | ||
276 | }; | ||
277 | LIST_HEAD(head); | ||
234 | 278 | ||
235 | if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { | 279 | spin_lock(&inode->i_lock); |
236 | destroy_layout_hdr(lo); | 280 | pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); |
237 | spin_unlock(&inode->i_lock); | 281 | pnfs_mark_matching_lsegs_invalid(lo, &head, &range); |
282 | spin_unlock(&inode->i_lock); | ||
283 | pnfs_free_lseg_list(&head); | ||
284 | dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, | ||
285 | iomode == IOMODE_RW ? "RW" : "READ"); | ||
286 | } | ||
287 | |||
288 | static bool | ||
289 | pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode) | ||
290 | { | ||
291 | unsigned long start, end; | ||
292 | int fail_bit = pnfs_iomode_to_fail_bit(iomode); | ||
293 | |||
294 | if (test_bit(fail_bit, &lo->plh_flags) == 0) | ||
295 | return false; | ||
296 | end = jiffies; | ||
297 | start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT; | ||
298 | if (!time_in_range(lo->plh_retry_timestamp, start, end)) { | ||
299 | /* It is time to retry the failed layoutgets */ | ||
300 | pnfs_layout_clear_fail_bit(lo, fail_bit); | ||
301 | return false; | ||
238 | } | 302 | } |
303 | return true; | ||
239 | } | 304 | } |
240 | 305 | ||
241 | static void | 306 | static void |
@@ -249,33 +314,32 @@ init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) | |||
249 | lseg->pls_layout = lo; | 314 | lseg->pls_layout = lo; |
250 | } | 315 | } |
251 | 316 | ||
252 | static void free_lseg(struct pnfs_layout_segment *lseg) | 317 | static void pnfs_free_lseg(struct pnfs_layout_segment *lseg) |
253 | { | 318 | { |
254 | struct inode *ino = lseg->pls_layout->plh_inode; | 319 | struct inode *ino = lseg->pls_layout->plh_inode; |
255 | 320 | ||
256 | NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); | 321 | NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); |
257 | /* Matched by get_layout_hdr in pnfs_insert_layout */ | ||
258 | put_layout_hdr(NFS_I(ino)->layout); | ||
259 | } | 322 | } |
260 | 323 | ||
261 | static void | 324 | static void |
262 | put_lseg_common(struct pnfs_layout_segment *lseg) | 325 | pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, |
326 | struct pnfs_layout_segment *lseg) | ||
263 | { | 327 | { |
264 | struct inode *inode = lseg->pls_layout->plh_inode; | 328 | struct inode *inode = lo->plh_inode; |
265 | 329 | ||
266 | WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); | 330 | WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); |
267 | list_del_init(&lseg->pls_list); | 331 | list_del_init(&lseg->pls_list); |
268 | if (list_empty(&lseg->pls_layout->plh_segs)) { | 332 | /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ |
269 | set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); | 333 | atomic_dec(&lo->plh_refcount); |
270 | /* Matched by initial refcount set in alloc_init_layout_hdr */ | 334 | if (list_empty(&lo->plh_segs)) |
271 | put_layout_hdr_locked(lseg->pls_layout); | 335 | clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); |
272 | } | ||
273 | rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); | 336 | rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); |
274 | } | 337 | } |
275 | 338 | ||
276 | void | 339 | void |
277 | put_lseg(struct pnfs_layout_segment *lseg) | 340 | pnfs_put_lseg(struct pnfs_layout_segment *lseg) |
278 | { | 341 | { |
342 | struct pnfs_layout_hdr *lo; | ||
279 | struct inode *inode; | 343 | struct inode *inode; |
280 | 344 | ||
281 | if (!lseg) | 345 | if (!lseg) |
@@ -284,17 +348,17 @@ put_lseg(struct pnfs_layout_segment *lseg) | |||
284 | dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, | 348 | dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, |
285 | atomic_read(&lseg->pls_refcount), | 349 | atomic_read(&lseg->pls_refcount), |
286 | test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); | 350 | test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); |
287 | inode = lseg->pls_layout->plh_inode; | 351 | lo = lseg->pls_layout; |
352 | inode = lo->plh_inode; | ||
288 | if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { | 353 | if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { |
289 | LIST_HEAD(free_me); | 354 | pnfs_get_layout_hdr(lo); |
290 | 355 | pnfs_layout_remove_lseg(lo, lseg); | |
291 | put_lseg_common(lseg); | ||
292 | list_add(&lseg->pls_list, &free_me); | ||
293 | spin_unlock(&inode->i_lock); | 356 | spin_unlock(&inode->i_lock); |
294 | pnfs_free_lseg_list(&free_me); | 357 | pnfs_free_lseg(lseg); |
358 | pnfs_put_layout_hdr(lo); | ||
295 | } | 359 | } |
296 | } | 360 | } |
297 | EXPORT_SYMBOL_GPL(put_lseg); | 361 | EXPORT_SYMBOL_GPL(pnfs_put_lseg); |
298 | 362 | ||
299 | static inline u64 | 363 | static inline u64 |
300 | end_offset(u64 start, u64 len) | 364 | end_offset(u64 start, u64 len) |
@@ -378,7 +442,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, | |||
378 | dprintk("%s: lseg %p ref %d\n", __func__, lseg, | 442 | dprintk("%s: lseg %p ref %d\n", __func__, lseg, |
379 | atomic_read(&lseg->pls_refcount)); | 443 | atomic_read(&lseg->pls_refcount)); |
380 | if (atomic_dec_and_test(&lseg->pls_refcount)) { | 444 | if (atomic_dec_and_test(&lseg->pls_refcount)) { |
381 | put_lseg_common(lseg); | 445 | pnfs_layout_remove_lseg(lseg->pls_layout, lseg); |
382 | list_add(&lseg->pls_list, tmp_list); | 446 | list_add(&lseg->pls_list, tmp_list); |
383 | rv = 1; | 447 | rv = 1; |
384 | } | 448 | } |
@@ -390,7 +454,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, | |||
390 | * after call. | 454 | * after call. |
391 | */ | 455 | */ |
392 | int | 456 | int |
393 | mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | 457 | pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, |
394 | struct list_head *tmp_list, | 458 | struct list_head *tmp_list, |
395 | struct pnfs_layout_range *recall_range) | 459 | struct pnfs_layout_range *recall_range) |
396 | { | 460 | { |
@@ -399,14 +463,8 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | |||
399 | 463 | ||
400 | dprintk("%s:Begin lo %p\n", __func__, lo); | 464 | dprintk("%s:Begin lo %p\n", __func__, lo); |
401 | 465 | ||
402 | if (list_empty(&lo->plh_segs)) { | 466 | if (list_empty(&lo->plh_segs)) |
403 | /* Reset MDS Threshold I/O counters */ | ||
404 | NFS_I(lo->plh_inode)->write_io = 0; | ||
405 | NFS_I(lo->plh_inode)->read_io = 0; | ||
406 | if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) | ||
407 | put_layout_hdr_locked(lo); | ||
408 | return 0; | 467 | return 0; |
409 | } | ||
410 | list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) | 468 | list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) |
411 | if (!recall_range || | 469 | if (!recall_range || |
412 | should_free_lseg(&lseg->pls_range, recall_range)) { | 470 | should_free_lseg(&lseg->pls_range, recall_range)) { |
@@ -426,25 +484,13 @@ void | |||
426 | pnfs_free_lseg_list(struct list_head *free_me) | 484 | pnfs_free_lseg_list(struct list_head *free_me) |
427 | { | 485 | { |
428 | struct pnfs_layout_segment *lseg, *tmp; | 486 | struct pnfs_layout_segment *lseg, *tmp; |
429 | struct pnfs_layout_hdr *lo; | ||
430 | 487 | ||
431 | if (list_empty(free_me)) | 488 | if (list_empty(free_me)) |
432 | return; | 489 | return; |
433 | 490 | ||
434 | lo = list_first_entry(free_me, struct pnfs_layout_segment, | ||
435 | pls_list)->pls_layout; | ||
436 | |||
437 | if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) { | ||
438 | struct nfs_client *clp; | ||
439 | |||
440 | clp = NFS_SERVER(lo->plh_inode)->nfs_client; | ||
441 | spin_lock(&clp->cl_lock); | ||
442 | list_del_init(&lo->plh_layouts); | ||
443 | spin_unlock(&clp->cl_lock); | ||
444 | } | ||
445 | list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { | 491 | list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { |
446 | list_del(&lseg->pls_list); | 492 | list_del(&lseg->pls_list); |
447 | free_lseg(lseg); | 493 | pnfs_free_lseg(lseg); |
448 | } | 494 | } |
449 | } | 495 | } |
450 | 496 | ||
@@ -458,10 +504,15 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) | |||
458 | lo = nfsi->layout; | 504 | lo = nfsi->layout; |
459 | if (lo) { | 505 | if (lo) { |
460 | lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ | 506 | lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ |
461 | mark_matching_lsegs_invalid(lo, &tmp_list, NULL); | 507 | pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); |
462 | } | 508 | pnfs_get_layout_hdr(lo); |
463 | spin_unlock(&nfsi->vfs_inode.i_lock); | 509 | pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); |
464 | pnfs_free_lseg_list(&tmp_list); | 510 | pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); |
511 | spin_unlock(&nfsi->vfs_inode.i_lock); | ||
512 | pnfs_free_lseg_list(&tmp_list); | ||
513 | pnfs_put_layout_hdr(lo); | ||
514 | } else | ||
515 | spin_unlock(&nfsi->vfs_inode.i_lock); | ||
465 | } | 516 | } |
466 | EXPORT_SYMBOL_GPL(pnfs_destroy_layout); | 517 | EXPORT_SYMBOL_GPL(pnfs_destroy_layout); |
467 | 518 | ||
@@ -498,46 +549,54 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) | |||
498 | } | 549 | } |
499 | } | 550 | } |
500 | 551 | ||
552 | /* | ||
553 | * Compare 2 layout stateid sequence ids, to see which is newer, | ||
554 | * taking into account wraparound issues. | ||
555 | */ | ||
556 | static bool pnfs_seqid_is_newer(u32 s1, u32 s2) | ||
557 | { | ||
558 | return (s32)s1 - (s32)s2 > 0; | ||
559 | } | ||
560 | |||
501 | /* update lo->plh_stateid with new if is more recent */ | 561 | /* update lo->plh_stateid with new if is more recent */ |
502 | void | 562 | void |
503 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, | 563 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, |
504 | bool update_barrier) | 564 | bool update_barrier) |
505 | { | 565 | { |
506 | u32 oldseq, newseq; | 566 | u32 oldseq, newseq, new_barrier; |
567 | int empty = list_empty(&lo->plh_segs); | ||
507 | 568 | ||
508 | oldseq = be32_to_cpu(lo->plh_stateid.seqid); | 569 | oldseq = be32_to_cpu(lo->plh_stateid.seqid); |
509 | newseq = be32_to_cpu(new->seqid); | 570 | newseq = be32_to_cpu(new->seqid); |
510 | if ((int)(newseq - oldseq) > 0) { | 571 | if (empty || pnfs_seqid_is_newer(newseq, oldseq)) { |
511 | nfs4_stateid_copy(&lo->plh_stateid, new); | 572 | nfs4_stateid_copy(&lo->plh_stateid, new); |
512 | if (update_barrier) { | 573 | if (update_barrier) { |
513 | u32 new_barrier = be32_to_cpu(new->seqid); | 574 | new_barrier = be32_to_cpu(new->seqid); |
514 | |||
515 | if ((int)(new_barrier - lo->plh_barrier)) | ||
516 | lo->plh_barrier = new_barrier; | ||
517 | } else { | 575 | } else { |
518 | /* Because of wraparound, we want to keep the barrier | 576 | /* Because of wraparound, we want to keep the barrier |
519 | * "close" to the current seqids. It needs to be | 577 | * "close" to the current seqids. |
520 | * within 2**31 to count as "behind", so if it | ||
521 | * gets too near that limit, give us a litle leeway | ||
522 | * and bring it to within 2**30. | ||
523 | * NOTE - and yes, this is all unsigned arithmetic. | ||
524 | */ | 578 | */ |
525 | if (unlikely((newseq - lo->plh_barrier) > (3 << 29))) | 579 | new_barrier = newseq - atomic_read(&lo->plh_outstanding); |
526 | lo->plh_barrier = newseq - (1 << 30); | ||
527 | } | 580 | } |
581 | if (empty || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) | ||
582 | lo->plh_barrier = new_barrier; | ||
528 | } | 583 | } |
529 | } | 584 | } |
530 | 585 | ||
586 | static bool | ||
587 | pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, | ||
588 | const nfs4_stateid *stateid) | ||
589 | { | ||
590 | u32 seqid = be32_to_cpu(stateid->seqid); | ||
591 | |||
592 | return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); | ||
593 | } | ||
594 | |||
531 | /* lget is set to 1 if called from inside send_layoutget call chain */ | 595 | /* lget is set to 1 if called from inside send_layoutget call chain */ |
532 | static bool | 596 | static bool |
533 | pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, | 597 | pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo, int lget) |
534 | int lget) | ||
535 | { | 598 | { |
536 | if ((stateid) && | ||
537 | (int)(lo->plh_barrier - be32_to_cpu(stateid->seqid)) >= 0) | ||
538 | return true; | ||
539 | return lo->plh_block_lgets || | 599 | return lo->plh_block_lgets || |
540 | test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) || | ||
541 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || | 600 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || |
542 | (list_empty(&lo->plh_segs) && | 601 | (list_empty(&lo->plh_segs) && |
543 | (atomic_read(&lo->plh_outstanding) > lget)); | 602 | (atomic_read(&lo->plh_outstanding) > lget)); |
@@ -551,7 +610,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | |||
551 | 610 | ||
552 | dprintk("--> %s\n", __func__); | 611 | dprintk("--> %s\n", __func__); |
553 | spin_lock(&lo->plh_inode->i_lock); | 612 | spin_lock(&lo->plh_inode->i_lock); |
554 | if (pnfs_layoutgets_blocked(lo, NULL, 1)) { | 613 | if (pnfs_layoutgets_blocked(lo, 1)) { |
555 | status = -EAGAIN; | 614 | status = -EAGAIN; |
556 | } else if (list_empty(&lo->plh_segs)) { | 615 | } else if (list_empty(&lo->plh_segs)) { |
557 | int seq; | 616 | int seq; |
@@ -582,7 +641,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
582 | struct inode *ino = lo->plh_inode; | 641 | struct inode *ino = lo->plh_inode; |
583 | struct nfs_server *server = NFS_SERVER(ino); | 642 | struct nfs_server *server = NFS_SERVER(ino); |
584 | struct nfs4_layoutget *lgp; | 643 | struct nfs4_layoutget *lgp; |
585 | struct pnfs_layout_segment *lseg = NULL; | 644 | struct pnfs_layout_segment *lseg; |
586 | 645 | ||
587 | dprintk("--> %s\n", __func__); | 646 | dprintk("--> %s\n", __func__); |
588 | 647 | ||
@@ -599,16 +658,22 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
599 | lgp->args.type = server->pnfs_curr_ld->id; | 658 | lgp->args.type = server->pnfs_curr_ld->id; |
600 | lgp->args.inode = ino; | 659 | lgp->args.inode = ino; |
601 | lgp->args.ctx = get_nfs_open_context(ctx); | 660 | lgp->args.ctx = get_nfs_open_context(ctx); |
602 | lgp->lsegpp = &lseg; | ||
603 | lgp->gfp_flags = gfp_flags; | 661 | lgp->gfp_flags = gfp_flags; |
604 | 662 | ||
605 | /* Synchronously retrieve layout information from server and | 663 | /* Synchronously retrieve layout information from server and |
606 | * store in lseg. | 664 | * store in lseg. |
607 | */ | 665 | */ |
608 | nfs4_proc_layoutget(lgp, gfp_flags); | 666 | lseg = nfs4_proc_layoutget(lgp, gfp_flags); |
609 | if (!lseg) { | 667 | if (IS_ERR(lseg)) { |
610 | /* remember that LAYOUTGET failed and suspend trying */ | 668 | switch (PTR_ERR(lseg)) { |
611 | set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); | 669 | case -ENOMEM: |
670 | case -ERESTARTSYS: | ||
671 | break; | ||
672 | default: | ||
673 | /* remember that LAYOUTGET failed and suspend trying */ | ||
674 | pnfs_layout_io_set_failed(lo, range->iomode); | ||
675 | } | ||
676 | return NULL; | ||
612 | } | 677 | } |
613 | 678 | ||
614 | return lseg; | 679 | return lseg; |
@@ -636,25 +701,24 @@ _pnfs_return_layout(struct inode *ino) | |||
636 | 701 | ||
637 | spin_lock(&ino->i_lock); | 702 | spin_lock(&ino->i_lock); |
638 | lo = nfsi->layout; | 703 | lo = nfsi->layout; |
639 | if (!lo || pnfs_test_layout_returned(lo)) { | 704 | if (!lo) { |
640 | spin_unlock(&ino->i_lock); | 705 | spin_unlock(&ino->i_lock); |
641 | dprintk("NFS: %s no layout to return\n", __func__); | 706 | dprintk("NFS: %s no layout to return\n", __func__); |
642 | goto out; | 707 | goto out; |
643 | } | 708 | } |
644 | stateid = nfsi->layout->plh_stateid; | 709 | stateid = nfsi->layout->plh_stateid; |
645 | /* Reference matched in nfs4_layoutreturn_release */ | 710 | /* Reference matched in nfs4_layoutreturn_release */ |
646 | get_layout_hdr(lo); | 711 | pnfs_get_layout_hdr(lo); |
647 | empty = list_empty(&lo->plh_segs); | 712 | empty = list_empty(&lo->plh_segs); |
648 | mark_matching_lsegs_invalid(lo, &tmp_list, NULL); | 713 | pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); |
649 | /* Don't send a LAYOUTRETURN if list was initially empty */ | 714 | /* Don't send a LAYOUTRETURN if list was initially empty */ |
650 | if (empty) { | 715 | if (empty) { |
651 | spin_unlock(&ino->i_lock); | 716 | spin_unlock(&ino->i_lock); |
652 | put_layout_hdr(lo); | 717 | pnfs_put_layout_hdr(lo); |
653 | dprintk("NFS: %s no layout segments to return\n", __func__); | 718 | dprintk("NFS: %s no layout segments to return\n", __func__); |
654 | goto out; | 719 | goto out; |
655 | } | 720 | } |
656 | lo->plh_block_lgets++; | 721 | lo->plh_block_lgets++; |
657 | pnfs_mark_layout_returned(lo); | ||
658 | spin_unlock(&ino->i_lock); | 722 | spin_unlock(&ino->i_lock); |
659 | pnfs_free_lseg_list(&tmp_list); | 723 | pnfs_free_lseg_list(&tmp_list); |
660 | 724 | ||
@@ -663,10 +727,10 @@ _pnfs_return_layout(struct inode *ino) | |||
663 | lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); | 727 | lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); |
664 | if (unlikely(lrp == NULL)) { | 728 | if (unlikely(lrp == NULL)) { |
665 | status = -ENOMEM; | 729 | status = -ENOMEM; |
666 | set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags); | 730 | spin_lock(&ino->i_lock); |
667 | set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags); | 731 | lo->plh_block_lgets--; |
668 | pnfs_clear_layout_returned(lo); | 732 | spin_unlock(&ino->i_lock); |
669 | put_layout_hdr(lo); | 733 | pnfs_put_layout_hdr(lo); |
670 | goto out; | 734 | goto out; |
671 | } | 735 | } |
672 | 736 | ||
@@ -703,7 +767,7 @@ bool pnfs_roc(struct inode *ino) | |||
703 | if (!found) | 767 | if (!found) |
704 | goto out_nolayout; | 768 | goto out_nolayout; |
705 | lo->plh_block_lgets++; | 769 | lo->plh_block_lgets++; |
706 | get_layout_hdr(lo); /* matched in pnfs_roc_release */ | 770 | pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */ |
707 | spin_unlock(&ino->i_lock); | 771 | spin_unlock(&ino->i_lock); |
708 | pnfs_free_lseg_list(&tmp_list); | 772 | pnfs_free_lseg_list(&tmp_list); |
709 | return true; | 773 | return true; |
@@ -720,8 +784,12 @@ void pnfs_roc_release(struct inode *ino) | |||
720 | spin_lock(&ino->i_lock); | 784 | spin_lock(&ino->i_lock); |
721 | lo = NFS_I(ino)->layout; | 785 | lo = NFS_I(ino)->layout; |
722 | lo->plh_block_lgets--; | 786 | lo->plh_block_lgets--; |
723 | put_layout_hdr_locked(lo); | 787 | if (atomic_dec_and_test(&lo->plh_refcount)) { |
724 | spin_unlock(&ino->i_lock); | 788 | pnfs_detach_layout_hdr(lo); |
789 | spin_unlock(&ino->i_lock); | ||
790 | pnfs_free_layout_hdr(lo); | ||
791 | } else | ||
792 | spin_unlock(&ino->i_lock); | ||
725 | } | 793 | } |
726 | 794 | ||
727 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) | 795 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) |
@@ -730,32 +798,34 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) | |||
730 | 798 | ||
731 | spin_lock(&ino->i_lock); | 799 | spin_lock(&ino->i_lock); |
732 | lo = NFS_I(ino)->layout; | 800 | lo = NFS_I(ino)->layout; |
733 | if ((int)(barrier - lo->plh_barrier) > 0) | 801 | if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) |
734 | lo->plh_barrier = barrier; | 802 | lo->plh_barrier = barrier; |
735 | spin_unlock(&ino->i_lock); | 803 | spin_unlock(&ino->i_lock); |
736 | } | 804 | } |
737 | 805 | ||
738 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier) | 806 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) |
739 | { | 807 | { |
740 | struct nfs_inode *nfsi = NFS_I(ino); | 808 | struct nfs_inode *nfsi = NFS_I(ino); |
809 | struct pnfs_layout_hdr *lo; | ||
741 | struct pnfs_layout_segment *lseg; | 810 | struct pnfs_layout_segment *lseg; |
811 | u32 current_seqid; | ||
742 | bool found = false; | 812 | bool found = false; |
743 | 813 | ||
744 | spin_lock(&ino->i_lock); | 814 | spin_lock(&ino->i_lock); |
745 | list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) | 815 | list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) |
746 | if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { | 816 | if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { |
817 | rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); | ||
747 | found = true; | 818 | found = true; |
748 | break; | 819 | goto out; |
749 | } | 820 | } |
750 | if (!found) { | 821 | lo = nfsi->layout; |
751 | struct pnfs_layout_hdr *lo = nfsi->layout; | 822 | current_seqid = be32_to_cpu(lo->plh_stateid.seqid); |
752 | u32 current_seqid = be32_to_cpu(lo->plh_stateid.seqid); | ||
753 | 823 | ||
754 | /* Since close does not return a layout stateid for use as | 824 | /* Since close does not return a layout stateid for use as |
755 | * a barrier, we choose the worst-case barrier. | 825 | * a barrier, we choose the worst-case barrier. |
756 | */ | 826 | */ |
757 | *barrier = current_seqid + atomic_read(&lo->plh_outstanding); | 827 | *barrier = current_seqid + atomic_read(&lo->plh_outstanding); |
758 | } | 828 | out: |
759 | spin_unlock(&ino->i_lock); | 829 | spin_unlock(&ino->i_lock); |
760 | return found; | 830 | return found; |
761 | } | 831 | } |
@@ -786,14 +856,13 @@ cmp_layout(struct pnfs_layout_range *l1, | |||
786 | } | 856 | } |
787 | 857 | ||
788 | static void | 858 | static void |
789 | pnfs_insert_layout(struct pnfs_layout_hdr *lo, | 859 | pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo, |
790 | struct pnfs_layout_segment *lseg) | 860 | struct pnfs_layout_segment *lseg) |
791 | { | 861 | { |
792 | struct pnfs_layout_segment *lp; | 862 | struct pnfs_layout_segment *lp; |
793 | 863 | ||
794 | dprintk("%s:Begin\n", __func__); | 864 | dprintk("%s:Begin\n", __func__); |
795 | 865 | ||
796 | assert_spin_locked(&lo->plh_inode->i_lock); | ||
797 | list_for_each_entry(lp, &lo->plh_segs, pls_list) { | 866 | list_for_each_entry(lp, &lo->plh_segs, pls_list) { |
798 | if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0) | 867 | if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0) |
799 | continue; | 868 | continue; |
@@ -813,7 +882,7 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, | |||
813 | __func__, lseg, lseg->pls_range.iomode, | 882 | __func__, lseg, lseg->pls_range.iomode, |
814 | lseg->pls_range.offset, lseg->pls_range.length); | 883 | lseg->pls_range.offset, lseg->pls_range.length); |
815 | out: | 884 | out: |
816 | get_layout_hdr(lo); | 885 | pnfs_get_layout_hdr(lo); |
817 | 886 | ||
818 | dprintk("%s:Return\n", __func__); | 887 | dprintk("%s:Return\n", __func__); |
819 | } | 888 | } |
@@ -847,21 +916,19 @@ pnfs_find_alloc_layout(struct inode *ino, | |||
847 | 916 | ||
848 | dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); | 917 | dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); |
849 | 918 | ||
850 | assert_spin_locked(&ino->i_lock); | 919 | if (nfsi->layout != NULL) |
851 | if (nfsi->layout) { | 920 | goto out_existing; |
852 | if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags)) | ||
853 | return NULL; | ||
854 | else | ||
855 | return nfsi->layout; | ||
856 | } | ||
857 | spin_unlock(&ino->i_lock); | 921 | spin_unlock(&ino->i_lock); |
858 | new = alloc_init_layout_hdr(ino, ctx, gfp_flags); | 922 | new = alloc_init_layout_hdr(ino, ctx, gfp_flags); |
859 | spin_lock(&ino->i_lock); | 923 | spin_lock(&ino->i_lock); |
860 | 924 | ||
861 | if (likely(nfsi->layout == NULL)) /* Won the race? */ | 925 | if (likely(nfsi->layout == NULL)) { /* Won the race? */ |
862 | nfsi->layout = new; | 926 | nfsi->layout = new; |
863 | else | 927 | return new; |
864 | pnfs_free_layout_hdr(new); | 928 | } |
929 | pnfs_free_layout_hdr(new); | ||
930 | out_existing: | ||
931 | pnfs_get_layout_hdr(nfsi->layout); | ||
865 | return nfsi->layout; | 932 | return nfsi->layout; |
866 | } | 933 | } |
867 | 934 | ||
@@ -904,11 +971,10 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, | |||
904 | 971 | ||
905 | dprintk("%s:Begin\n", __func__); | 972 | dprintk("%s:Begin\n", __func__); |
906 | 973 | ||
907 | assert_spin_locked(&lo->plh_inode->i_lock); | ||
908 | list_for_each_entry(lseg, &lo->plh_segs, pls_list) { | 974 | list_for_each_entry(lseg, &lo->plh_segs, pls_list) { |
909 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && | 975 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && |
910 | is_matching_lseg(&lseg->pls_range, range)) { | 976 | is_matching_lseg(&lseg->pls_range, range)) { |
911 | ret = get_lseg(lseg); | 977 | ret = pnfs_get_lseg(lseg); |
912 | break; | 978 | break; |
913 | } | 979 | } |
914 | if (lseg->pls_range.offset > range->offset) | 980 | if (lseg->pls_range.offset > range->offset) |
@@ -1013,7 +1079,6 @@ pnfs_update_layout(struct inode *ino, | |||
1013 | .length = count, | 1079 | .length = count, |
1014 | }; | 1080 | }; |
1015 | unsigned pg_offset; | 1081 | unsigned pg_offset; |
1016 | struct nfs_inode *nfsi = NFS_I(ino); | ||
1017 | struct nfs_server *server = NFS_SERVER(ino); | 1082 | struct nfs_server *server = NFS_SERVER(ino); |
1018 | struct nfs_client *clp = server->nfs_client; | 1083 | struct nfs_client *clp = server->nfs_client; |
1019 | struct pnfs_layout_hdr *lo; | 1084 | struct pnfs_layout_hdr *lo; |
@@ -1021,16 +1086,16 @@ pnfs_update_layout(struct inode *ino, | |||
1021 | bool first = false; | 1086 | bool first = false; |
1022 | 1087 | ||
1023 | if (!pnfs_enabled_sb(NFS_SERVER(ino))) | 1088 | if (!pnfs_enabled_sb(NFS_SERVER(ino))) |
1024 | return NULL; | 1089 | goto out; |
1025 | 1090 | ||
1026 | if (pnfs_within_mdsthreshold(ctx, ino, iomode)) | 1091 | if (pnfs_within_mdsthreshold(ctx, ino, iomode)) |
1027 | return NULL; | 1092 | goto out; |
1028 | 1093 | ||
1029 | spin_lock(&ino->i_lock); | 1094 | spin_lock(&ino->i_lock); |
1030 | lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); | 1095 | lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); |
1031 | if (lo == NULL) { | 1096 | if (lo == NULL) { |
1032 | dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); | 1097 | spin_unlock(&ino->i_lock); |
1033 | goto out_unlock; | 1098 | goto out; |
1034 | } | 1099 | } |
1035 | 1100 | ||
1036 | /* Do we even need to bother with this? */ | 1101 | /* Do we even need to bother with this? */ |
@@ -1040,7 +1105,7 @@ pnfs_update_layout(struct inode *ino, | |||
1040 | } | 1105 | } |
1041 | 1106 | ||
1042 | /* if LAYOUTGET already failed once we don't try again */ | 1107 | /* if LAYOUTGET already failed once we don't try again */ |
1043 | if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) | 1108 | if (pnfs_layout_io_test_failed(lo, iomode)) |
1044 | goto out_unlock; | 1109 | goto out_unlock; |
1045 | 1110 | ||
1046 | /* Check to see if the layout for the given range already exists */ | 1111 | /* Check to see if the layout for the given range already exists */ |
@@ -1048,17 +1113,13 @@ pnfs_update_layout(struct inode *ino, | |||
1048 | if (lseg) | 1113 | if (lseg) |
1049 | goto out_unlock; | 1114 | goto out_unlock; |
1050 | 1115 | ||
1051 | if (pnfs_layoutgets_blocked(lo, NULL, 0)) | 1116 | if (pnfs_layoutgets_blocked(lo, 0)) |
1052 | goto out_unlock; | 1117 | goto out_unlock; |
1053 | atomic_inc(&lo->plh_outstanding); | 1118 | atomic_inc(&lo->plh_outstanding); |
1054 | 1119 | ||
1055 | get_layout_hdr(lo); | ||
1056 | if (list_empty(&lo->plh_segs)) | 1120 | if (list_empty(&lo->plh_segs)) |
1057 | first = true; | 1121 | first = true; |
1058 | 1122 | ||
1059 | /* Enable LAYOUTRETURNs */ | ||
1060 | pnfs_clear_layout_returned(lo); | ||
1061 | |||
1062 | spin_unlock(&ino->i_lock); | 1123 | spin_unlock(&ino->i_lock); |
1063 | if (first) { | 1124 | if (first) { |
1064 | /* The lo must be on the clp list if there is any | 1125 | /* The lo must be on the clp list if there is any |
@@ -1079,24 +1140,26 @@ pnfs_update_layout(struct inode *ino, | |||
1079 | arg.length = PAGE_CACHE_ALIGN(arg.length); | 1140 | arg.length = PAGE_CACHE_ALIGN(arg.length); |
1080 | 1141 | ||
1081 | lseg = send_layoutget(lo, ctx, &arg, gfp_flags); | 1142 | lseg = send_layoutget(lo, ctx, &arg, gfp_flags); |
1082 | if (!lseg && first) { | ||
1083 | spin_lock(&clp->cl_lock); | ||
1084 | list_del_init(&lo->plh_layouts); | ||
1085 | spin_unlock(&clp->cl_lock); | ||
1086 | } | ||
1087 | atomic_dec(&lo->plh_outstanding); | 1143 | atomic_dec(&lo->plh_outstanding); |
1088 | put_layout_hdr(lo); | 1144 | out_put_layout_hdr: |
1145 | pnfs_put_layout_hdr(lo); | ||
1089 | out: | 1146 | out: |
1090 | dprintk("%s end, state 0x%lx lseg %p\n", __func__, | 1147 | dprintk("%s: inode %s/%llu pNFS layout segment %s for " |
1091 | nfsi->layout ? nfsi->layout->plh_flags : -1, lseg); | 1148 | "(%s, offset: %llu, length: %llu)\n", |
1149 | __func__, ino->i_sb->s_id, | ||
1150 | (unsigned long long)NFS_FILEID(ino), | ||
1151 | lseg == NULL ? "not found" : "found", | ||
1152 | iomode==IOMODE_RW ? "read/write" : "read-only", | ||
1153 | (unsigned long long)pos, | ||
1154 | (unsigned long long)count); | ||
1092 | return lseg; | 1155 | return lseg; |
1093 | out_unlock: | 1156 | out_unlock: |
1094 | spin_unlock(&ino->i_lock); | 1157 | spin_unlock(&ino->i_lock); |
1095 | goto out; | 1158 | goto out_put_layout_hdr; |
1096 | } | 1159 | } |
1097 | EXPORT_SYMBOL_GPL(pnfs_update_layout); | 1160 | EXPORT_SYMBOL_GPL(pnfs_update_layout); |
1098 | 1161 | ||
1099 | int | 1162 | struct pnfs_layout_segment * |
1100 | pnfs_layout_process(struct nfs4_layoutget *lgp) | 1163 | pnfs_layout_process(struct nfs4_layoutget *lgp) |
1101 | { | 1164 | { |
1102 | struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; | 1165 | struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; |
@@ -1123,25 +1186,29 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
1123 | goto out_forget_reply; | 1186 | goto out_forget_reply; |
1124 | } | 1187 | } |
1125 | 1188 | ||
1126 | if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) { | 1189 | if (pnfs_layoutgets_blocked(lo, 1) || |
1190 | pnfs_layout_stateid_blocked(lo, &res->stateid)) { | ||
1127 | dprintk("%s forget reply due to state\n", __func__); | 1191 | dprintk("%s forget reply due to state\n", __func__); |
1128 | goto out_forget_reply; | 1192 | goto out_forget_reply; |
1129 | } | 1193 | } |
1194 | |||
1195 | /* Done processing layoutget. Set the layout stateid */ | ||
1196 | pnfs_set_layout_stateid(lo, &res->stateid, false); | ||
1197 | |||
1130 | init_lseg(lo, lseg); | 1198 | init_lseg(lo, lseg); |
1131 | lseg->pls_range = res->range; | 1199 | lseg->pls_range = res->range; |
1132 | *lgp->lsegpp = get_lseg(lseg); | 1200 | pnfs_get_lseg(lseg); |
1133 | pnfs_insert_layout(lo, lseg); | 1201 | pnfs_layout_insert_lseg(lo, lseg); |
1134 | 1202 | ||
1135 | if (res->return_on_close) { | 1203 | if (res->return_on_close) { |
1136 | set_bit(NFS_LSEG_ROC, &lseg->pls_flags); | 1204 | set_bit(NFS_LSEG_ROC, &lseg->pls_flags); |
1137 | set_bit(NFS_LAYOUT_ROC, &lo->plh_flags); | 1205 | set_bit(NFS_LAYOUT_ROC, &lo->plh_flags); |
1138 | } | 1206 | } |
1139 | 1207 | ||
1140 | /* Done processing layoutget. Set the layout stateid */ | ||
1141 | pnfs_set_layout_stateid(lo, &res->stateid, false); | ||
1142 | spin_unlock(&ino->i_lock); | 1208 | spin_unlock(&ino->i_lock); |
1209 | return lseg; | ||
1143 | out: | 1210 | out: |
1144 | return status; | 1211 | return ERR_PTR(status); |
1145 | 1212 | ||
1146 | out_forget_reply: | 1213 | out_forget_reply: |
1147 | spin_unlock(&ino->i_lock); | 1214 | spin_unlock(&ino->i_lock); |
@@ -1153,16 +1220,24 @@ out_forget_reply: | |||
1153 | void | 1220 | void |
1154 | pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 1221 | pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) |
1155 | { | 1222 | { |
1223 | u64 rd_size = req->wb_bytes; | ||
1224 | |||
1156 | BUG_ON(pgio->pg_lseg != NULL); | 1225 | BUG_ON(pgio->pg_lseg != NULL); |
1157 | 1226 | ||
1158 | if (req->wb_offset != req->wb_pgbase) { | 1227 | if (req->wb_offset != req->wb_pgbase) { |
1159 | nfs_pageio_reset_read_mds(pgio); | 1228 | nfs_pageio_reset_read_mds(pgio); |
1160 | return; | 1229 | return; |
1161 | } | 1230 | } |
1231 | |||
1232 | if (pgio->pg_dreq == NULL) | ||
1233 | rd_size = i_size_read(pgio->pg_inode) - req_offset(req); | ||
1234 | else | ||
1235 | rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); | ||
1236 | |||
1162 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 1237 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, |
1163 | req->wb_context, | 1238 | req->wb_context, |
1164 | req_offset(req), | 1239 | req_offset(req), |
1165 | req->wb_bytes, | 1240 | rd_size, |
1166 | IOMODE_READ, | 1241 | IOMODE_READ, |
1167 | GFP_KERNEL); | 1242 | GFP_KERNEL); |
1168 | /* If no lseg, fall back to read through mds */ | 1243 | /* If no lseg, fall back to read through mds */ |
@@ -1173,7 +1248,8 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r | |||
1173 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); | 1248 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); |
1174 | 1249 | ||
1175 | void | 1250 | void |
1176 | pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 1251 | pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, |
1252 | struct nfs_page *req, u64 wb_size) | ||
1177 | { | 1253 | { |
1178 | BUG_ON(pgio->pg_lseg != NULL); | 1254 | BUG_ON(pgio->pg_lseg != NULL); |
1179 | 1255 | ||
@@ -1181,10 +1257,11 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page * | |||
1181 | nfs_pageio_reset_write_mds(pgio); | 1257 | nfs_pageio_reset_write_mds(pgio); |
1182 | return; | 1258 | return; |
1183 | } | 1259 | } |
1260 | |||
1184 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 1261 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, |
1185 | req->wb_context, | 1262 | req->wb_context, |
1186 | req_offset(req), | 1263 | req_offset(req), |
1187 | req->wb_bytes, | 1264 | wb_size, |
1188 | IOMODE_RW, | 1265 | IOMODE_RW, |
1189 | GFP_NOFS); | 1266 | GFP_NOFS); |
1190 | /* If no lseg, fall back to write through mds */ | 1267 | /* If no lseg, fall back to write through mds */ |
@@ -1362,12 +1439,12 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he | |||
1362 | if (trypnfs == PNFS_NOT_ATTEMPTED) | 1439 | if (trypnfs == PNFS_NOT_ATTEMPTED) |
1363 | pnfs_write_through_mds(desc, data); | 1440 | pnfs_write_through_mds(desc, data); |
1364 | } | 1441 | } |
1365 | put_lseg(lseg); | 1442 | pnfs_put_lseg(lseg); |
1366 | } | 1443 | } |
1367 | 1444 | ||
1368 | static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) | 1445 | static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) |
1369 | { | 1446 | { |
1370 | put_lseg(hdr->lseg); | 1447 | pnfs_put_lseg(hdr->lseg); |
1371 | nfs_writehdr_free(hdr); | 1448 | nfs_writehdr_free(hdr); |
1372 | } | 1449 | } |
1373 | EXPORT_SYMBOL_GPL(pnfs_writehdr_free); | 1450 | EXPORT_SYMBOL_GPL(pnfs_writehdr_free); |
@@ -1382,17 +1459,17 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | |||
1382 | whdr = nfs_writehdr_alloc(); | 1459 | whdr = nfs_writehdr_alloc(); |
1383 | if (!whdr) { | 1460 | if (!whdr) { |
1384 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | 1461 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); |
1385 | put_lseg(desc->pg_lseg); | 1462 | pnfs_put_lseg(desc->pg_lseg); |
1386 | desc->pg_lseg = NULL; | 1463 | desc->pg_lseg = NULL; |
1387 | return -ENOMEM; | 1464 | return -ENOMEM; |
1388 | } | 1465 | } |
1389 | hdr = &whdr->header; | 1466 | hdr = &whdr->header; |
1390 | nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); | 1467 | nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); |
1391 | hdr->lseg = get_lseg(desc->pg_lseg); | 1468 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); |
1392 | atomic_inc(&hdr->refcnt); | 1469 | atomic_inc(&hdr->refcnt); |
1393 | ret = nfs_generic_flush(desc, hdr); | 1470 | ret = nfs_generic_flush(desc, hdr); |
1394 | if (ret != 0) { | 1471 | if (ret != 0) { |
1395 | put_lseg(desc->pg_lseg); | 1472 | pnfs_put_lseg(desc->pg_lseg); |
1396 | desc->pg_lseg = NULL; | 1473 | desc->pg_lseg = NULL; |
1397 | } else | 1474 | } else |
1398 | pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); | 1475 | pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); |
@@ -1517,12 +1594,12 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea | |||
1517 | if (trypnfs == PNFS_NOT_ATTEMPTED) | 1594 | if (trypnfs == PNFS_NOT_ATTEMPTED) |
1518 | pnfs_read_through_mds(desc, data); | 1595 | pnfs_read_through_mds(desc, data); |
1519 | } | 1596 | } |
1520 | put_lseg(lseg); | 1597 | pnfs_put_lseg(lseg); |
1521 | } | 1598 | } |
1522 | 1599 | ||
1523 | static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) | 1600 | static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) |
1524 | { | 1601 | { |
1525 | put_lseg(hdr->lseg); | 1602 | pnfs_put_lseg(hdr->lseg); |
1526 | nfs_readhdr_free(hdr); | 1603 | nfs_readhdr_free(hdr); |
1527 | } | 1604 | } |
1528 | EXPORT_SYMBOL_GPL(pnfs_readhdr_free); | 1605 | EXPORT_SYMBOL_GPL(pnfs_readhdr_free); |
@@ -1538,17 +1615,17 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | |||
1538 | if (!rhdr) { | 1615 | if (!rhdr) { |
1539 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | 1616 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); |
1540 | ret = -ENOMEM; | 1617 | ret = -ENOMEM; |
1541 | put_lseg(desc->pg_lseg); | 1618 | pnfs_put_lseg(desc->pg_lseg); |
1542 | desc->pg_lseg = NULL; | 1619 | desc->pg_lseg = NULL; |
1543 | return ret; | 1620 | return ret; |
1544 | } | 1621 | } |
1545 | hdr = &rhdr->header; | 1622 | hdr = &rhdr->header; |
1546 | nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); | 1623 | nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); |
1547 | hdr->lseg = get_lseg(desc->pg_lseg); | 1624 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); |
1548 | atomic_inc(&hdr->refcnt); | 1625 | atomic_inc(&hdr->refcnt); |
1549 | ret = nfs_generic_pagein(desc, hdr); | 1626 | ret = nfs_generic_pagein(desc, hdr); |
1550 | if (ret != 0) { | 1627 | if (ret != 0) { |
1551 | put_lseg(desc->pg_lseg); | 1628 | pnfs_put_lseg(desc->pg_lseg); |
1552 | desc->pg_lseg = NULL; | 1629 | desc->pg_lseg = NULL; |
1553 | } else | 1630 | } else |
1554 | pnfs_do_multiple_reads(desc, &hdr->rpc_list); | 1631 | pnfs_do_multiple_reads(desc, &hdr->rpc_list); |
@@ -1574,13 +1651,7 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) | |||
1574 | 1651 | ||
1575 | void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) | 1652 | void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) |
1576 | { | 1653 | { |
1577 | if (lseg->pls_range.iomode == IOMODE_RW) { | 1654 | pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); |
1578 | dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__); | ||
1579 | set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); | ||
1580 | } else { | ||
1581 | dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__); | ||
1582 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); | ||
1583 | } | ||
1584 | } | 1655 | } |
1585 | EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); | 1656 | EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); |
1586 | 1657 | ||
@@ -1601,7 +1672,7 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata) | |||
1601 | } | 1672 | } |
1602 | if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) { | 1673 | if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) { |
1603 | /* references matched in nfs4_layoutcommit_release */ | 1674 | /* references matched in nfs4_layoutcommit_release */ |
1604 | get_lseg(hdr->lseg); | 1675 | pnfs_get_lseg(hdr->lseg); |
1605 | } | 1676 | } |
1606 | if (end_pos > nfsi->layout->plh_lwb) | 1677 | if (end_pos > nfsi->layout->plh_lwb) |
1607 | nfsi->layout->plh_lwb = end_pos; | 1678 | nfsi->layout->plh_lwb = end_pos; |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 745aa1b39e7c..2d722dba1111 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -62,9 +62,6 @@ enum { | |||
62 | NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ | 62 | NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ |
63 | NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ | 63 | NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ |
64 | NFS_LAYOUT_ROC, /* some lseg had roc bit set */ | 64 | NFS_LAYOUT_ROC, /* some lseg had roc bit set */ |
65 | NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ | ||
66 | NFS_LAYOUT_INVALID, /* layout is being destroyed */ | ||
67 | NFS_LAYOUT_RETURNED, /* layout has already been returned */ | ||
68 | }; | 65 | }; |
69 | 66 | ||
70 | enum layoutdriver_policy_flags { | 67 | enum layoutdriver_policy_flags { |
@@ -140,6 +137,7 @@ struct pnfs_layout_hdr { | |||
140 | atomic_t plh_outstanding; /* number of RPCs out */ | 137 | atomic_t plh_outstanding; /* number of RPCs out */ |
141 | unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */ | 138 | unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */ |
142 | u32 plh_barrier; /* ignore lower seqids */ | 139 | u32 plh_barrier; /* ignore lower seqids */ |
140 | unsigned long plh_retry_timestamp; | ||
143 | unsigned long plh_flags; | 141 | unsigned long plh_flags; |
144 | loff_t plh_lwb; /* last write byte for layoutcommit */ | 142 | loff_t plh_lwb; /* last write byte for layoutcommit */ |
145 | struct rpc_cred *plh_lc_cred; /* layoutcommit cred */ | 143 | struct rpc_cred *plh_lc_cred; /* layoutcommit cred */ |
@@ -172,12 +170,12 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server, | |||
172 | struct pnfs_devicelist *devlist); | 170 | struct pnfs_devicelist *devlist); |
173 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, | 171 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, |
174 | struct pnfs_device *dev); | 172 | struct pnfs_device *dev); |
175 | extern void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); | 173 | extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); |
176 | extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); | 174 | extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); |
177 | 175 | ||
178 | /* pnfs.c */ | 176 | /* pnfs.c */ |
179 | void get_layout_hdr(struct pnfs_layout_hdr *lo); | 177 | void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); |
180 | void put_lseg(struct pnfs_layout_segment *lseg); | 178 | void pnfs_put_lseg(struct pnfs_layout_segment *lseg); |
181 | 179 | ||
182 | void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, | 180 | void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, |
183 | const struct nfs_pgio_completion_ops *); | 181 | const struct nfs_pgio_completion_ops *); |
@@ -188,28 +186,29 @@ void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); | |||
188 | void unset_pnfs_layoutdriver(struct nfs_server *); | 186 | void unset_pnfs_layoutdriver(struct nfs_server *); |
189 | void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); | 187 | void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); |
190 | int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); | 188 | int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); |
191 | void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); | 189 | void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, |
190 | struct nfs_page *req, u64 wb_size); | ||
192 | int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); | 191 | int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); |
193 | bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); | 192 | bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); |
194 | void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); | 193 | void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); |
195 | int pnfs_layout_process(struct nfs4_layoutget *lgp); | 194 | struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); |
196 | void pnfs_free_lseg_list(struct list_head *tmp_list); | 195 | void pnfs_free_lseg_list(struct list_head *tmp_list); |
197 | void pnfs_destroy_layout(struct nfs_inode *); | 196 | void pnfs_destroy_layout(struct nfs_inode *); |
198 | void pnfs_destroy_all_layouts(struct nfs_client *); | 197 | void pnfs_destroy_all_layouts(struct nfs_client *); |
199 | void put_layout_hdr(struct pnfs_layout_hdr *lo); | 198 | void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); |
200 | void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, | 199 | void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, |
201 | const nfs4_stateid *new, | 200 | const nfs4_stateid *new, |
202 | bool update_barrier); | 201 | bool update_barrier); |
203 | int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, | 202 | int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, |
204 | struct pnfs_layout_hdr *lo, | 203 | struct pnfs_layout_hdr *lo, |
205 | struct nfs4_state *open_state); | 204 | struct nfs4_state *open_state); |
206 | int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | 205 | int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, |
207 | struct list_head *tmp_list, | 206 | struct list_head *tmp_list, |
208 | struct pnfs_layout_range *recall_range); | 207 | struct pnfs_layout_range *recall_range); |
209 | bool pnfs_roc(struct inode *ino); | 208 | bool pnfs_roc(struct inode *ino); |
210 | void pnfs_roc_release(struct inode *ino); | 209 | void pnfs_roc_release(struct inode *ino); |
211 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); | 210 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); |
212 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier); | 211 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); |
213 | void pnfs_set_layoutcommit(struct nfs_write_data *wdata); | 212 | void pnfs_set_layoutcommit(struct nfs_write_data *wdata); |
214 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); | 213 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); |
215 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); | 214 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); |
@@ -233,6 +232,7 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); | |||
233 | /* nfs4_deviceid_flags */ | 232 | /* nfs4_deviceid_flags */ |
234 | enum { | 233 | enum { |
235 | NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ | 234 | NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ |
235 | NFS_DEVICEID_UNAVAILABLE, /* device temporarily unavailable */ | ||
236 | }; | 236 | }; |
237 | 237 | ||
238 | /* pnfs_dev.c */ | 238 | /* pnfs_dev.c */ |
@@ -242,6 +242,7 @@ struct nfs4_deviceid_node { | |||
242 | const struct pnfs_layoutdriver_type *ld; | 242 | const struct pnfs_layoutdriver_type *ld; |
243 | const struct nfs_client *nfs_client; | 243 | const struct nfs_client *nfs_client; |
244 | unsigned long flags; | 244 | unsigned long flags; |
245 | unsigned long timestamp_unavailable; | ||
245 | struct nfs4_deviceid deviceid; | 246 | struct nfs4_deviceid deviceid; |
246 | atomic_t ref; | 247 | atomic_t ref; |
247 | }; | 248 | }; |
@@ -254,34 +255,12 @@ void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, | |||
254 | const struct nfs4_deviceid *); | 255 | const struct nfs4_deviceid *); |
255 | struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *); | 256 | struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *); |
256 | bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); | 257 | bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); |
258 | void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node); | ||
259 | bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); | ||
257 | void nfs4_deviceid_purge_client(const struct nfs_client *); | 260 | void nfs4_deviceid_purge_client(const struct nfs_client *); |
258 | 261 | ||
259 | static inline void | ||
260 | pnfs_mark_layout_returned(struct pnfs_layout_hdr *lo) | ||
261 | { | ||
262 | set_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags); | ||
263 | } | ||
264 | |||
265 | static inline void | ||
266 | pnfs_clear_layout_returned(struct pnfs_layout_hdr *lo) | ||
267 | { | ||
268 | clear_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags); | ||
269 | } | ||
270 | |||
271 | static inline bool | ||
272 | pnfs_test_layout_returned(struct pnfs_layout_hdr *lo) | ||
273 | { | ||
274 | return test_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags); | ||
275 | } | ||
276 | |||
277 | static inline int lo_fail_bit(u32 iomode) | ||
278 | { | ||
279 | return iomode == IOMODE_RW ? | ||
280 | NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; | ||
281 | } | ||
282 | |||
283 | static inline struct pnfs_layout_segment * | 262 | static inline struct pnfs_layout_segment * |
284 | get_lseg(struct pnfs_layout_segment *lseg) | 263 | pnfs_get_lseg(struct pnfs_layout_segment *lseg) |
285 | { | 264 | { |
286 | if (lseg) { | 265 | if (lseg) { |
287 | atomic_inc(&lseg->pls_refcount); | 266 | atomic_inc(&lseg->pls_refcount); |
@@ -406,12 +385,12 @@ static inline void pnfs_destroy_layout(struct nfs_inode *nfsi) | |||
406 | } | 385 | } |
407 | 386 | ||
408 | static inline struct pnfs_layout_segment * | 387 | static inline struct pnfs_layout_segment * |
409 | get_lseg(struct pnfs_layout_segment *lseg) | 388 | pnfs_get_lseg(struct pnfs_layout_segment *lseg) |
410 | { | 389 | { |
411 | return NULL; | 390 | return NULL; |
412 | } | 391 | } |
413 | 392 | ||
414 | static inline void put_lseg(struct pnfs_layout_segment *lseg) | 393 | static inline void pnfs_put_lseg(struct pnfs_layout_segment *lseg) |
415 | { | 394 | { |
416 | } | 395 | } |
417 | 396 | ||
@@ -443,7 +422,7 @@ pnfs_roc_set_barrier(struct inode *ino, u32 barrier) | |||
443 | } | 422 | } |
444 | 423 | ||
445 | static inline bool | 424 | static inline bool |
446 | pnfs_roc_drain(struct inode *ino, u32 *barrier) | 425 | pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) |
447 | { | 426 | { |
448 | return false; | 427 | return false; |
449 | } | 428 | } |
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 73f701f1f4d3..d35b62e83ea6 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c | |||
@@ -40,6 +40,8 @@ | |||
40 | #define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS) | 40 | #define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS) |
41 | #define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1) | 41 | #define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1) |
42 | 42 | ||
43 | #define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ) | ||
44 | |||
43 | static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE]; | 45 | static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE]; |
44 | static DEFINE_SPINLOCK(nfs4_deviceid_lock); | 46 | static DEFINE_SPINLOCK(nfs4_deviceid_lock); |
45 | 47 | ||
@@ -218,6 +220,30 @@ nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) | |||
218 | } | 220 | } |
219 | EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node); | 221 | EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node); |
220 | 222 | ||
223 | void | ||
224 | nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node) | ||
225 | { | ||
226 | node->timestamp_unavailable = jiffies; | ||
227 | set_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags); | ||
228 | } | ||
229 | EXPORT_SYMBOL_GPL(nfs4_mark_deviceid_unavailable); | ||
230 | |||
231 | bool | ||
232 | nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node) | ||
233 | { | ||
234 | if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) { | ||
235 | unsigned long start, end; | ||
236 | |||
237 | end = jiffies; | ||
238 | start = end - PNFS_DEVICE_RETRY_TIMEOUT; | ||
239 | if (time_in_range(node->timestamp_unavailable, start, end)) | ||
240 | return true; | ||
241 | clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags); | ||
242 | } | ||
243 | return false; | ||
244 | } | ||
245 | EXPORT_SYMBOL_GPL(nfs4_test_deviceid_unavailable); | ||
246 | |||
221 | static void | 247 | static void |
222 | _deviceid_purge_client(const struct nfs_client *clp, long hash) | 248 | _deviceid_purge_client(const struct nfs_client *clp, long hash) |
223 | { | 249 | { |
@@ -276,3 +302,4 @@ nfs4_deviceid_mark_client_invalid(struct nfs_client *clp) | |||
276 | } | 302 | } |
277 | rcu_read_unlock(); | 303 | rcu_read_unlock(); |
278 | } | 304 | } |
305 | |||
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index d2c7f5db0847..e831bce49766 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -88,6 +88,7 @@ enum { | |||
88 | Opt_sharecache, Opt_nosharecache, | 88 | Opt_sharecache, Opt_nosharecache, |
89 | Opt_resvport, Opt_noresvport, | 89 | Opt_resvport, Opt_noresvport, |
90 | Opt_fscache, Opt_nofscache, | 90 | Opt_fscache, Opt_nofscache, |
91 | Opt_migration, Opt_nomigration, | ||
91 | 92 | ||
92 | /* Mount options that take integer arguments */ | 93 | /* Mount options that take integer arguments */ |
93 | Opt_port, | 94 | Opt_port, |
@@ -147,6 +148,8 @@ static const match_table_t nfs_mount_option_tokens = { | |||
147 | { Opt_noresvport, "noresvport" }, | 148 | { Opt_noresvport, "noresvport" }, |
148 | { Opt_fscache, "fsc" }, | 149 | { Opt_fscache, "fsc" }, |
149 | { Opt_nofscache, "nofsc" }, | 150 | { Opt_nofscache, "nofsc" }, |
151 | { Opt_migration, "migration" }, | ||
152 | { Opt_nomigration, "nomigration" }, | ||
150 | 153 | ||
151 | { Opt_port, "port=%s" }, | 154 | { Opt_port, "port=%s" }, |
152 | { Opt_rsize, "rsize=%s" }, | 155 | { Opt_rsize, "rsize=%s" }, |
@@ -676,6 +679,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, | |||
676 | if (nfss->options & NFS_OPTION_FSCACHE) | 679 | if (nfss->options & NFS_OPTION_FSCACHE) |
677 | seq_printf(m, ",fsc"); | 680 | seq_printf(m, ",fsc"); |
678 | 681 | ||
682 | if (nfss->options & NFS_OPTION_MIGRATION) | ||
683 | seq_printf(m, ",migration"); | ||
684 | |||
679 | if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) { | 685 | if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) { |
680 | if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) | 686 | if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) |
681 | seq_printf(m, ",lookupcache=none"); | 687 | seq_printf(m, ",lookupcache=none"); |
@@ -1106,7 +1112,7 @@ static int nfs_get_option_ul(substring_t args[], unsigned long *option) | |||
1106 | string = match_strdup(args); | 1112 | string = match_strdup(args); |
1107 | if (string == NULL) | 1113 | if (string == NULL) |
1108 | return -ENOMEM; | 1114 | return -ENOMEM; |
1109 | rc = strict_strtoul(string, 10, option); | 1115 | rc = kstrtoul(string, 10, option); |
1110 | kfree(string); | 1116 | kfree(string); |
1111 | 1117 | ||
1112 | return rc; | 1118 | return rc; |
@@ -1243,6 +1249,12 @@ static int nfs_parse_mount_options(char *raw, | |||
1243 | kfree(mnt->fscache_uniq); | 1249 | kfree(mnt->fscache_uniq); |
1244 | mnt->fscache_uniq = NULL; | 1250 | mnt->fscache_uniq = NULL; |
1245 | break; | 1251 | break; |
1252 | case Opt_migration: | ||
1253 | mnt->options |= NFS_OPTION_MIGRATION; | ||
1254 | break; | ||
1255 | case Opt_nomigration: | ||
1256 | mnt->options &= NFS_OPTION_MIGRATION; | ||
1257 | break; | ||
1246 | 1258 | ||
1247 | /* | 1259 | /* |
1248 | * options that take numeric values | 1260 | * options that take numeric values |
@@ -1535,6 +1547,10 @@ static int nfs_parse_mount_options(char *raw, | |||
1535 | if (mnt->minorversion && mnt->version != 4) | 1547 | if (mnt->minorversion && mnt->version != 4) |
1536 | goto out_minorversion_mismatch; | 1548 | goto out_minorversion_mismatch; |
1537 | 1549 | ||
1550 | if (mnt->options & NFS_OPTION_MIGRATION && | ||
1551 | mnt->version != 4 && mnt->minorversion != 0) | ||
1552 | goto out_migration_misuse; | ||
1553 | |||
1538 | /* | 1554 | /* |
1539 | * verify that any proto=/mountproto= options match the address | 1555 | * verify that any proto=/mountproto= options match the address |
1540 | * families in the addr=/mountaddr= options. | 1556 | * families in the addr=/mountaddr= options. |
@@ -1572,6 +1588,10 @@ out_minorversion_mismatch: | |||
1572 | printk(KERN_INFO "NFS: mount option vers=%u does not support " | 1588 | printk(KERN_INFO "NFS: mount option vers=%u does not support " |
1573 | "minorversion=%u\n", mnt->version, mnt->minorversion); | 1589 | "minorversion=%u\n", mnt->version, mnt->minorversion); |
1574 | return 0; | 1590 | return 0; |
1591 | out_migration_misuse: | ||
1592 | printk(KERN_INFO | ||
1593 | "NFS: 'migration' not supported for this NFS version\n"); | ||
1594 | return 0; | ||
1575 | out_nomem: | 1595 | out_nomem: |
1576 | printk(KERN_INFO "NFS: not enough memory to parse option\n"); | 1596 | printk(KERN_INFO "NFS: not enough memory to parse option\n"); |
1577 | return 0; | 1597 | return 0; |
@@ -2494,7 +2514,7 @@ EXPORT_SYMBOL_GPL(nfs_kill_super); | |||
2494 | /* | 2514 | /* |
2495 | * Clone an NFS2/3/4 server record on xdev traversal (FSID-change) | 2515 | * Clone an NFS2/3/4 server record on xdev traversal (FSID-change) |
2496 | */ | 2516 | */ |
2497 | struct dentry * | 2517 | static struct dentry * |
2498 | nfs_xdev_mount(struct file_system_type *fs_type, int flags, | 2518 | nfs_xdev_mount(struct file_system_type *fs_type, int flags, |
2499 | const char *dev_name, void *raw_data) | 2519 | const char *dev_name, void *raw_data) |
2500 | { | 2520 | { |
@@ -2642,6 +2662,7 @@ unsigned int nfs_idmap_cache_timeout = 600; | |||
2642 | bool nfs4_disable_idmapping = true; | 2662 | bool nfs4_disable_idmapping = true; |
2643 | unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; | 2663 | unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; |
2644 | unsigned short send_implementation_id = 1; | 2664 | unsigned short send_implementation_id = 1; |
2665 | char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = ""; | ||
2645 | 2666 | ||
2646 | EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport); | 2667 | EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport); |
2647 | EXPORT_SYMBOL_GPL(nfs_callback_tcpport); | 2668 | EXPORT_SYMBOL_GPL(nfs_callback_tcpport); |
@@ -2649,6 +2670,7 @@ EXPORT_SYMBOL_GPL(nfs_idmap_cache_timeout); | |||
2649 | EXPORT_SYMBOL_GPL(nfs4_disable_idmapping); | 2670 | EXPORT_SYMBOL_GPL(nfs4_disable_idmapping); |
2650 | EXPORT_SYMBOL_GPL(max_session_slots); | 2671 | EXPORT_SYMBOL_GPL(max_session_slots); |
2651 | EXPORT_SYMBOL_GPL(send_implementation_id); | 2672 | EXPORT_SYMBOL_GPL(send_implementation_id); |
2673 | EXPORT_SYMBOL_GPL(nfs4_client_id_uniquifier); | ||
2652 | 2674 | ||
2653 | #define NFS_CALLBACK_MAXPORTNR (65535U) | 2675 | #define NFS_CALLBACK_MAXPORTNR (65535U) |
2654 | 2676 | ||
@@ -2659,7 +2681,7 @@ static int param_set_portnr(const char *val, const struct kernel_param *kp) | |||
2659 | 2681 | ||
2660 | if (!val) | 2682 | if (!val) |
2661 | return -EINVAL; | 2683 | return -EINVAL; |
2662 | ret = strict_strtoul(val, 0, &num); | 2684 | ret = kstrtoul(val, 0, &num); |
2663 | if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR) | 2685 | if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR) |
2664 | return -EINVAL; | 2686 | return -EINVAL; |
2665 | *((unsigned int *)kp->arg) = num; | 2687 | *((unsigned int *)kp->arg) = num; |
@@ -2674,6 +2696,8 @@ static struct kernel_param_ops param_ops_portnr = { | |||
2674 | module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); | 2696 | module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); |
2675 | module_param(nfs_idmap_cache_timeout, int, 0644); | 2697 | module_param(nfs_idmap_cache_timeout, int, 0644); |
2676 | module_param(nfs4_disable_idmapping, bool, 0644); | 2698 | module_param(nfs4_disable_idmapping, bool, 0644); |
2699 | module_param_string(nfs4_unique_id, nfs4_client_id_uniquifier, | ||
2700 | NFS4_CLIENT_ID_UNIQ_LEN, 0600); | ||
2677 | MODULE_PARM_DESC(nfs4_disable_idmapping, | 2701 | MODULE_PARM_DESC(nfs4_disable_idmapping, |
2678 | "Turn off NFSv4 idmapping when using 'sec=sys'"); | 2702 | "Turn off NFSv4 idmapping when using 'sec=sys'"); |
2679 | module_param(max_session_slots, ushort, 0644); | 2703 | module_param(max_session_slots, ushort, 0644); |
@@ -2682,6 +2706,7 @@ MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " | |||
2682 | module_param(send_implementation_id, ushort, 0644); | 2706 | module_param(send_implementation_id, ushort, 0644); |
2683 | MODULE_PARM_DESC(send_implementation_id, | 2707 | MODULE_PARM_DESC(send_implementation_id, |
2684 | "Send implementation ID with NFSv4.1 exchange_id"); | 2708 | "Send implementation ID with NFSv4.1 exchange_id"); |
2709 | MODULE_PARM_DESC(nfs4_unique_id, "nfs_client_id4 uniquifier string"); | ||
2685 | MODULE_ALIAS("nfs4"); | 2710 | MODULE_ALIAS("nfs4"); |
2686 | 2711 | ||
2687 | #endif /* CONFIG_NFS_V4 */ | 2712 | #endif /* CONFIG_NFS_V4 */ |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e3b55372726c..9347ab7c9574 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -846,6 +846,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, | |||
846 | int nfs_flush_incompatible(struct file *file, struct page *page) | 846 | int nfs_flush_incompatible(struct file *file, struct page *page) |
847 | { | 847 | { |
848 | struct nfs_open_context *ctx = nfs_file_open_context(file); | 848 | struct nfs_open_context *ctx = nfs_file_open_context(file); |
849 | struct nfs_lock_context *l_ctx; | ||
849 | struct nfs_page *req; | 850 | struct nfs_page *req; |
850 | int do_flush, status; | 851 | int do_flush, status; |
851 | /* | 852 | /* |
@@ -860,9 +861,12 @@ int nfs_flush_incompatible(struct file *file, struct page *page) | |||
860 | req = nfs_page_find_request(page); | 861 | req = nfs_page_find_request(page); |
861 | if (req == NULL) | 862 | if (req == NULL) |
862 | return 0; | 863 | return 0; |
863 | do_flush = req->wb_page != page || req->wb_context != ctx || | 864 | l_ctx = req->wb_lock_context; |
864 | req->wb_lock_context->lockowner != current->files || | 865 | do_flush = req->wb_page != page || req->wb_context != ctx; |
865 | req->wb_lock_context->pid != current->tgid; | 866 | if (l_ctx) { |
867 | do_flush |= l_ctx->lockowner.l_owner != current->files | ||
868 | || l_ctx->lockowner.l_pid != current->tgid; | ||
869 | } | ||
866 | nfs_release_request(req); | 870 | nfs_release_request(req); |
867 | if (!do_flush) | 871 | if (!do_flush) |
868 | return 0; | 872 | return 0; |
@@ -1576,6 +1580,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) | |||
1576 | /* We have a mismatch. Write the page again */ | 1580 | /* We have a mismatch. Write the page again */ |
1577 | dprintk(" mismatch\n"); | 1581 | dprintk(" mismatch\n"); |
1578 | nfs_mark_request_dirty(req); | 1582 | nfs_mark_request_dirty(req); |
1583 | set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); | ||
1579 | next: | 1584 | next: |
1580 | nfs_unlock_and_release_request(req); | 1585 | nfs_unlock_and_release_request(req); |
1581 | } | 1586 | } |
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 6aa5590c3679..b314888825d5 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c | |||
@@ -218,8 +218,7 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p, | |||
218 | * There must be an encoding function for void results so svc_process | 218 | * There must be an encoding function for void results so svc_process |
219 | * will work properly. | 219 | * will work properly. |
220 | */ | 220 | */ |
221 | int | 221 | static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy) |
222 | nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy) | ||
223 | { | 222 | { |
224 | return xdr_ressize_check(rqstp, p); | 223 | return xdr_ressize_check(rqstp, p); |
225 | } | 224 | } |
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 9095f3c21df9..97d90d1c8608 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c | |||
@@ -247,7 +247,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, | |||
247 | /* Now create the file and set attributes */ | 247 | /* Now create the file and set attributes */ |
248 | nfserr = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len, | 248 | nfserr = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len, |
249 | attr, newfhp, | 249 | attr, newfhp, |
250 | argp->createmode, argp->verf, NULL, NULL); | 250 | argp->createmode, (u32 *)argp->verf, NULL, NULL); |
251 | 251 | ||
252 | RETURN_STATUS(nfserr); | 252 | RETURN_STATUS(nfserr); |
253 | } | 253 | } |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 4c7bd35b1876..bdf29c96e4cd 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -1028,7 +1028,6 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp) | |||
1028 | cb->cb_msg.rpc_cred = callback_cred; | 1028 | cb->cb_msg.rpc_cred = callback_cred; |
1029 | 1029 | ||
1030 | cb->cb_ops = &nfsd4_cb_recall_ops; | 1030 | cb->cb_ops = &nfsd4_cb_recall_ops; |
1031 | dp->dl_retries = 1; | ||
1032 | 1031 | ||
1033 | INIT_LIST_HEAD(&cb->cb_per_client); | 1032 | INIT_LIST_HEAD(&cb->cb_per_client); |
1034 | cb->cb_done = true; | 1033 | cb->cb_done = true; |
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index fdc91a6fc9c4..a1f10c0a6255 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c | |||
@@ -478,7 +478,7 @@ nfsd_idmap_init(struct net *net) | |||
478 | goto destroy_idtoname_cache; | 478 | goto destroy_idtoname_cache; |
479 | nn->nametoid_cache = cache_create_net(&nametoid_cache_template, net); | 479 | nn->nametoid_cache = cache_create_net(&nametoid_cache_template, net); |
480 | if (IS_ERR(nn->nametoid_cache)) { | 480 | if (IS_ERR(nn->nametoid_cache)) { |
481 | rv = PTR_ERR(nn->idtoname_cache); | 481 | rv = PTR_ERR(nn->nametoid_cache); |
482 | goto unregister_idtoname_cache; | 482 | goto unregister_idtoname_cache; |
483 | } | 483 | } |
484 | rv = cache_register_net(nn->nametoid_cache, net); | 484 | rv = cache_register_net(nn->nametoid_cache, net); |
@@ -598,7 +598,7 @@ numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namel | |||
598 | /* Just to make sure it's null-terminated: */ | 598 | /* Just to make sure it's null-terminated: */ |
599 | memcpy(buf, name, namelen); | 599 | memcpy(buf, name, namelen); |
600 | buf[namelen] = '\0'; | 600 | buf[namelen] = '\0'; |
601 | ret = kstrtouint(name, 10, id); | 601 | ret = kstrtouint(buf, 10, id); |
602 | return ret == 0; | 602 | return ret == 0; |
603 | } | 603 | } |
604 | 604 | ||
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index c9c1c0a25417..6c9a4b291dba 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -370,7 +370,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
370 | break; | 370 | break; |
371 | case NFS4_OPEN_CLAIM_PREVIOUS: | 371 | case NFS4_OPEN_CLAIM_PREVIOUS: |
372 | open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; | 372 | open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; |
373 | status = nfs4_check_open_reclaim(&open->op_clientid); | 373 | status = nfs4_check_open_reclaim(&open->op_clientid, cstate->minorversion); |
374 | if (status) | 374 | if (status) |
375 | goto out; | 375 | goto out; |
376 | case NFS4_OPEN_CLAIM_FH: | 376 | case NFS4_OPEN_CLAIM_FH: |
@@ -1054,8 +1054,8 @@ struct nfsd4_operation { | |||
1054 | char *op_name; | 1054 | char *op_name; |
1055 | /* Try to get response size before operation */ | 1055 | /* Try to get response size before operation */ |
1056 | nfsd4op_rsize op_rsize_bop; | 1056 | nfsd4op_rsize op_rsize_bop; |
1057 | stateid_setter op_get_currentstateid; | 1057 | stateid_getter op_get_currentstateid; |
1058 | stateid_getter op_set_currentstateid; | 1058 | stateid_setter op_set_currentstateid; |
1059 | }; | 1059 | }; |
1060 | 1060 | ||
1061 | static struct nfsd4_operation nfsd4_ops[]; | 1061 | static struct nfsd4_operation nfsd4_ops[]; |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 48a1bad37334..d0237f872cc4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -758,7 +758,7 @@ static void nfsd4_put_drc_mem(int slotsize, int num) | |||
758 | spin_unlock(&nfsd_drc_lock); | 758 | spin_unlock(&nfsd_drc_lock); |
759 | } | 759 | } |
760 | 760 | ||
761 | static struct nfsd4_session *alloc_session(int slotsize, int numslots) | 761 | static struct nfsd4_session *__alloc_session(int slotsize, int numslots) |
762 | { | 762 | { |
763 | struct nfsd4_session *new; | 763 | struct nfsd4_session *new; |
764 | int mem, i; | 764 | int mem, i; |
@@ -852,35 +852,28 @@ static int nfsd4_register_conn(struct nfsd4_conn *conn) | |||
852 | return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user); | 852 | return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user); |
853 | } | 853 | } |
854 | 854 | ||
855 | static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses, u32 dir) | 855 | static void nfsd4_init_conn(struct svc_rqst *rqstp, struct nfsd4_conn *conn, struct nfsd4_session *ses) |
856 | { | 856 | { |
857 | struct nfsd4_conn *conn; | ||
858 | int ret; | 857 | int ret; |
859 | 858 | ||
860 | conn = alloc_conn(rqstp, dir); | ||
861 | if (!conn) | ||
862 | return nfserr_jukebox; | ||
863 | nfsd4_hash_conn(conn, ses); | 859 | nfsd4_hash_conn(conn, ses); |
864 | ret = nfsd4_register_conn(conn); | 860 | ret = nfsd4_register_conn(conn); |
865 | if (ret) | 861 | if (ret) |
866 | /* oops; xprt is already down: */ | 862 | /* oops; xprt is already down: */ |
867 | nfsd4_conn_lost(&conn->cn_xpt_user); | 863 | nfsd4_conn_lost(&conn->cn_xpt_user); |
868 | if (ses->se_client->cl_cb_state == NFSD4_CB_DOWN && | 864 | if (conn->cn_flags & NFS4_CDFC4_BACK) { |
869 | dir & NFS4_CDFC4_BACK) { | ||
870 | /* callback channel may be back up */ | 865 | /* callback channel may be back up */ |
871 | nfsd4_probe_callback(ses->se_client); | 866 | nfsd4_probe_callback(ses->se_client); |
872 | } | 867 | } |
873 | return nfs_ok; | ||
874 | } | 868 | } |
875 | 869 | ||
876 | static __be32 nfsd4_new_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_session *ses) | 870 | static struct nfsd4_conn *alloc_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_create_session *cses) |
877 | { | 871 | { |
878 | u32 dir = NFS4_CDFC4_FORE; | 872 | u32 dir = NFS4_CDFC4_FORE; |
879 | 873 | ||
880 | if (ses->se_flags & SESSION4_BACK_CHAN) | 874 | if (cses->flags & SESSION4_BACK_CHAN) |
881 | dir |= NFS4_CDFC4_BACK; | 875 | dir |= NFS4_CDFC4_BACK; |
882 | 876 | return alloc_conn(rqstp, dir); | |
883 | return nfsd4_new_conn(rqstp, ses, dir); | ||
884 | } | 877 | } |
885 | 878 | ||
886 | /* must be called under client_lock */ | 879 | /* must be called under client_lock */ |
@@ -903,20 +896,21 @@ static void nfsd4_del_conns(struct nfsd4_session *s) | |||
903 | spin_unlock(&clp->cl_lock); | 896 | spin_unlock(&clp->cl_lock); |
904 | } | 897 | } |
905 | 898 | ||
899 | static void __free_session(struct nfsd4_session *ses) | ||
900 | { | ||
901 | nfsd4_put_drc_mem(slot_bytes(&ses->se_fchannel), ses->se_fchannel.maxreqs); | ||
902 | free_session_slots(ses); | ||
903 | kfree(ses); | ||
904 | } | ||
905 | |||
906 | static void free_session(struct kref *kref) | 906 | static void free_session(struct kref *kref) |
907 | { | 907 | { |
908 | struct nfsd4_session *ses; | 908 | struct nfsd4_session *ses; |
909 | int mem; | ||
910 | 909 | ||
911 | lockdep_assert_held(&client_lock); | 910 | lockdep_assert_held(&client_lock); |
912 | ses = container_of(kref, struct nfsd4_session, se_ref); | 911 | ses = container_of(kref, struct nfsd4_session, se_ref); |
913 | nfsd4_del_conns(ses); | 912 | nfsd4_del_conns(ses); |
914 | spin_lock(&nfsd_drc_lock); | 913 | __free_session(ses); |
915 | mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel); | ||
916 | nfsd_drc_mem_used -= mem; | ||
917 | spin_unlock(&nfsd_drc_lock); | ||
918 | free_session_slots(ses); | ||
919 | kfree(ses); | ||
920 | } | 914 | } |
921 | 915 | ||
922 | void nfsd4_put_session(struct nfsd4_session *ses) | 916 | void nfsd4_put_session(struct nfsd4_session *ses) |
@@ -926,14 +920,10 @@ void nfsd4_put_session(struct nfsd4_session *ses) | |||
926 | spin_unlock(&client_lock); | 920 | spin_unlock(&client_lock); |
927 | } | 921 | } |
928 | 922 | ||
929 | static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses) | 923 | static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan) |
930 | { | 924 | { |
931 | struct nfsd4_session *new; | 925 | struct nfsd4_session *new; |
932 | struct nfsd4_channel_attrs *fchan = &cses->fore_channel; | ||
933 | int numslots, slotsize; | 926 | int numslots, slotsize; |
934 | __be32 status; | ||
935 | int idx; | ||
936 | |||
937 | /* | 927 | /* |
938 | * Note decreasing slot size below client's request may | 928 | * Note decreasing slot size below client's request may |
939 | * make it difficult for client to function correctly, whereas | 929 | * make it difficult for client to function correctly, whereas |
@@ -946,12 +936,18 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n | |||
946 | if (numslots < 1) | 936 | if (numslots < 1) |
947 | return NULL; | 937 | return NULL; |
948 | 938 | ||
949 | new = alloc_session(slotsize, numslots); | 939 | new = __alloc_session(slotsize, numslots); |
950 | if (!new) { | 940 | if (!new) { |
951 | nfsd4_put_drc_mem(slotsize, fchan->maxreqs); | 941 | nfsd4_put_drc_mem(slotsize, fchan->maxreqs); |
952 | return NULL; | 942 | return NULL; |
953 | } | 943 | } |
954 | init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize); | 944 | init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize); |
945 | return new; | ||
946 | } | ||
947 | |||
948 | static struct nfsd4_session *init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses) | ||
949 | { | ||
950 | int idx; | ||
955 | 951 | ||
956 | new->se_client = clp; | 952 | new->se_client = clp; |
957 | gen_sessionid(new); | 953 | gen_sessionid(new); |
@@ -970,14 +966,6 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n | |||
970 | spin_unlock(&clp->cl_lock); | 966 | spin_unlock(&clp->cl_lock); |
971 | spin_unlock(&client_lock); | 967 | spin_unlock(&client_lock); |
972 | 968 | ||
973 | status = nfsd4_new_conn_from_crses(rqstp, new); | ||
974 | /* whoops: benny points out, status is ignored! (err, or bogus) */ | ||
975 | if (status) { | ||
976 | spin_lock(&client_lock); | ||
977 | free_session(&new->se_ref); | ||
978 | spin_unlock(&client_lock); | ||
979 | return NULL; | ||
980 | } | ||
981 | if (cses->flags & SESSION4_BACK_CHAN) { | 969 | if (cses->flags & SESSION4_BACK_CHAN) { |
982 | struct sockaddr *sa = svc_addr(rqstp); | 970 | struct sockaddr *sa = svc_addr(rqstp); |
983 | /* | 971 | /* |
@@ -990,7 +978,6 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n | |||
990 | rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa); | 978 | rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa); |
991 | clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa); | 979 | clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa); |
992 | } | 980 | } |
993 | nfsd4_probe_callback(clp); | ||
994 | return new; | 981 | return new; |
995 | } | 982 | } |
996 | 983 | ||
@@ -1131,7 +1118,7 @@ unhash_client_locked(struct nfs4_client *clp) | |||
1131 | } | 1118 | } |
1132 | 1119 | ||
1133 | static void | 1120 | static void |
1134 | expire_client(struct nfs4_client *clp) | 1121 | destroy_client(struct nfs4_client *clp) |
1135 | { | 1122 | { |
1136 | struct nfs4_openowner *oo; | 1123 | struct nfs4_openowner *oo; |
1137 | struct nfs4_delegation *dp; | 1124 | struct nfs4_delegation *dp; |
@@ -1165,6 +1152,12 @@ expire_client(struct nfs4_client *clp) | |||
1165 | spin_unlock(&client_lock); | 1152 | spin_unlock(&client_lock); |
1166 | } | 1153 | } |
1167 | 1154 | ||
1155 | static void expire_client(struct nfs4_client *clp) | ||
1156 | { | ||
1157 | nfsd4_client_record_remove(clp); | ||
1158 | destroy_client(clp); | ||
1159 | } | ||
1160 | |||
1168 | static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) | 1161 | static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) |
1169 | { | 1162 | { |
1170 | memcpy(target->cl_verifier.data, source->data, | 1163 | memcpy(target->cl_verifier.data, source->data, |
@@ -1223,10 +1216,26 @@ static bool groups_equal(struct group_info *g1, struct group_info *g2) | |||
1223 | return true; | 1216 | return true; |
1224 | } | 1217 | } |
1225 | 1218 | ||
1219 | /* | ||
1220 | * RFC 3530 language requires clid_inuse be returned when the | ||
1221 | * "principal" associated with a requests differs from that previously | ||
1222 | * used. We use uid, gid's, and gss principal string as our best | ||
1223 | * approximation. We also don't want to allow non-gss use of a client | ||
1224 | * established using gss: in theory cr_principal should catch that | ||
1225 | * change, but in practice cr_principal can be null even in the gss case | ||
1226 | * since gssd doesn't always pass down a principal string. | ||
1227 | */ | ||
1228 | static bool is_gss_cred(struct svc_cred *cr) | ||
1229 | { | ||
1230 | /* Is cr_flavor one of the gss "pseudoflavors"?: */ | ||
1231 | return (cr->cr_flavor > RPC_AUTH_MAXFLAVOR); | ||
1232 | } | ||
1233 | |||
1234 | |||
1226 | static bool | 1235 | static bool |
1227 | same_creds(struct svc_cred *cr1, struct svc_cred *cr2) | 1236 | same_creds(struct svc_cred *cr1, struct svc_cred *cr2) |
1228 | { | 1237 | { |
1229 | if ((cr1->cr_flavor != cr2->cr_flavor) | 1238 | if ((is_gss_cred(cr1) != is_gss_cred(cr2)) |
1230 | || (cr1->cr_uid != cr2->cr_uid) | 1239 | || (cr1->cr_uid != cr2->cr_uid) |
1231 | || (cr1->cr_gid != cr2->cr_gid) | 1240 | || (cr1->cr_gid != cr2->cr_gid) |
1232 | || !groups_equal(cr1->cr_group_info, cr2->cr_group_info)) | 1241 | || !groups_equal(cr1->cr_group_info, cr2->cr_group_info)) |
@@ -1340,13 +1349,15 @@ move_to_confirmed(struct nfs4_client *clp) | |||
1340 | } | 1349 | } |
1341 | 1350 | ||
1342 | static struct nfs4_client * | 1351 | static struct nfs4_client * |
1343 | find_confirmed_client(clientid_t *clid) | 1352 | find_confirmed_client(clientid_t *clid, bool sessions) |
1344 | { | 1353 | { |
1345 | struct nfs4_client *clp; | 1354 | struct nfs4_client *clp; |
1346 | unsigned int idhashval = clientid_hashval(clid->cl_id); | 1355 | unsigned int idhashval = clientid_hashval(clid->cl_id); |
1347 | 1356 | ||
1348 | list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) { | 1357 | list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) { |
1349 | if (same_clid(&clp->cl_clientid, clid)) { | 1358 | if (same_clid(&clp->cl_clientid, clid)) { |
1359 | if ((bool)clp->cl_minorversion != sessions) | ||
1360 | return NULL; | ||
1350 | renew_client(clp); | 1361 | renew_client(clp); |
1351 | return clp; | 1362 | return clp; |
1352 | } | 1363 | } |
@@ -1355,14 +1366,17 @@ find_confirmed_client(clientid_t *clid) | |||
1355 | } | 1366 | } |
1356 | 1367 | ||
1357 | static struct nfs4_client * | 1368 | static struct nfs4_client * |
1358 | find_unconfirmed_client(clientid_t *clid) | 1369 | find_unconfirmed_client(clientid_t *clid, bool sessions) |
1359 | { | 1370 | { |
1360 | struct nfs4_client *clp; | 1371 | struct nfs4_client *clp; |
1361 | unsigned int idhashval = clientid_hashval(clid->cl_id); | 1372 | unsigned int idhashval = clientid_hashval(clid->cl_id); |
1362 | 1373 | ||
1363 | list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) { | 1374 | list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) { |
1364 | if (same_clid(&clp->cl_clientid, clid)) | 1375 | if (same_clid(&clp->cl_clientid, clid)) { |
1376 | if ((bool)clp->cl_minorversion != sessions) | ||
1377 | return NULL; | ||
1365 | return clp; | 1378 | return clp; |
1379 | } | ||
1366 | } | 1380 | } |
1367 | return NULL; | 1381 | return NULL; |
1368 | } | 1382 | } |
@@ -1651,6 +1665,7 @@ out_new: | |||
1651 | status = nfserr_jukebox; | 1665 | status = nfserr_jukebox; |
1652 | goto out; | 1666 | goto out; |
1653 | } | 1667 | } |
1668 | new->cl_minorversion = 1; | ||
1654 | 1669 | ||
1655 | gen_clid(new); | 1670 | gen_clid(new); |
1656 | add_to_unconfirmed(new, strhashval); | 1671 | add_to_unconfirmed(new, strhashval); |
@@ -1743,67 +1758,71 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
1743 | struct sockaddr *sa = svc_addr(rqstp); | 1758 | struct sockaddr *sa = svc_addr(rqstp); |
1744 | struct nfs4_client *conf, *unconf; | 1759 | struct nfs4_client *conf, *unconf; |
1745 | struct nfsd4_session *new; | 1760 | struct nfsd4_session *new; |
1761 | struct nfsd4_conn *conn; | ||
1746 | struct nfsd4_clid_slot *cs_slot = NULL; | 1762 | struct nfsd4_clid_slot *cs_slot = NULL; |
1747 | bool confirm_me = false; | ||
1748 | __be32 status = 0; | 1763 | __be32 status = 0; |
1749 | 1764 | ||
1750 | if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) | 1765 | if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) |
1751 | return nfserr_inval; | 1766 | return nfserr_inval; |
1767 | if (check_forechannel_attrs(cr_ses->fore_channel)) | ||
1768 | return nfserr_toosmall; | ||
1769 | new = alloc_session(&cr_ses->fore_channel); | ||
1770 | if (!new) | ||
1771 | return nfserr_jukebox; | ||
1772 | status = nfserr_jukebox; | ||
1773 | conn = alloc_conn_from_crses(rqstp, cr_ses); | ||
1774 | if (!conn) | ||
1775 | goto out_free_session; | ||
1752 | 1776 | ||
1753 | nfs4_lock_state(); | 1777 | nfs4_lock_state(); |
1754 | unconf = find_unconfirmed_client(&cr_ses->clientid); | 1778 | unconf = find_unconfirmed_client(&cr_ses->clientid, true); |
1755 | conf = find_confirmed_client(&cr_ses->clientid); | 1779 | conf = find_confirmed_client(&cr_ses->clientid, true); |
1756 | 1780 | ||
1757 | if (conf) { | 1781 | if (conf) { |
1758 | cs_slot = &conf->cl_cs_slot; | 1782 | cs_slot = &conf->cl_cs_slot; |
1759 | status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); | 1783 | status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); |
1760 | if (status == nfserr_replay_cache) { | 1784 | if (status == nfserr_replay_cache) { |
1761 | status = nfsd4_replay_create_session(cr_ses, cs_slot); | 1785 | status = nfsd4_replay_create_session(cr_ses, cs_slot); |
1762 | goto out; | 1786 | goto out_free_conn; |
1763 | } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) { | 1787 | } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) { |
1764 | status = nfserr_seq_misordered; | 1788 | status = nfserr_seq_misordered; |
1765 | goto out; | 1789 | goto out_free_conn; |
1766 | } | 1790 | } |
1767 | } else if (unconf) { | 1791 | } else if (unconf) { |
1792 | unsigned int hash; | ||
1793 | struct nfs4_client *old; | ||
1768 | if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || | 1794 | if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || |
1769 | !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { | 1795 | !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { |
1770 | status = nfserr_clid_inuse; | 1796 | status = nfserr_clid_inuse; |
1771 | goto out; | 1797 | goto out_free_conn; |
1772 | } | 1798 | } |
1773 | cs_slot = &unconf->cl_cs_slot; | 1799 | cs_slot = &unconf->cl_cs_slot; |
1774 | status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); | 1800 | status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); |
1775 | if (status) { | 1801 | if (status) { |
1776 | /* an unconfirmed replay returns misordered */ | 1802 | /* an unconfirmed replay returns misordered */ |
1777 | status = nfserr_seq_misordered; | 1803 | status = nfserr_seq_misordered; |
1778 | goto out; | 1804 | goto out_free_conn; |
1779 | } | 1805 | } |
1780 | confirm_me = true; | 1806 | hash = clientstr_hashval(unconf->cl_recdir); |
1807 | old = find_confirmed_client_by_str(unconf->cl_recdir, hash); | ||
1808 | if (old) | ||
1809 | expire_client(old); | ||
1810 | move_to_confirmed(unconf); | ||
1781 | conf = unconf; | 1811 | conf = unconf; |
1782 | } else { | 1812 | } else { |
1783 | status = nfserr_stale_clientid; | 1813 | status = nfserr_stale_clientid; |
1784 | goto out; | 1814 | goto out_free_conn; |
1785 | } | 1815 | } |
1786 | 1816 | status = nfs_ok; | |
1787 | /* | ||
1788 | * XXX: we should probably set this at creation time, and check | ||
1789 | * for consistent minorversion use throughout: | ||
1790 | */ | ||
1791 | conf->cl_minorversion = 1; | ||
1792 | /* | 1817 | /* |
1793 | * We do not support RDMA or persistent sessions | 1818 | * We do not support RDMA or persistent sessions |
1794 | */ | 1819 | */ |
1795 | cr_ses->flags &= ~SESSION4_PERSIST; | 1820 | cr_ses->flags &= ~SESSION4_PERSIST; |
1796 | cr_ses->flags &= ~SESSION4_RDMA; | 1821 | cr_ses->flags &= ~SESSION4_RDMA; |
1797 | 1822 | ||
1798 | status = nfserr_toosmall; | 1823 | init_session(rqstp, new, conf, cr_ses); |
1799 | if (check_forechannel_attrs(cr_ses->fore_channel)) | 1824 | nfsd4_init_conn(rqstp, conn, new); |
1800 | goto out; | ||
1801 | 1825 | ||
1802 | status = nfserr_jukebox; | ||
1803 | new = alloc_init_session(rqstp, conf, cr_ses); | ||
1804 | if (!new) | ||
1805 | goto out; | ||
1806 | status = nfs_ok; | ||
1807 | memcpy(cr_ses->sessionid.data, new->se_sessionid.data, | 1826 | memcpy(cr_ses->sessionid.data, new->se_sessionid.data, |
1808 | NFS4_MAX_SESSIONID_LEN); | 1827 | NFS4_MAX_SESSIONID_LEN); |
1809 | memcpy(&cr_ses->fore_channel, &new->se_fchannel, | 1828 | memcpy(&cr_ses->fore_channel, &new->se_fchannel, |
@@ -1813,18 +1832,15 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
1813 | 1832 | ||
1814 | /* cache solo and embedded create sessions under the state lock */ | 1833 | /* cache solo and embedded create sessions under the state lock */ |
1815 | nfsd4_cache_create_session(cr_ses, cs_slot, status); | 1834 | nfsd4_cache_create_session(cr_ses, cs_slot, status); |
1816 | if (confirm_me) { | ||
1817 | unsigned int hash = clientstr_hashval(unconf->cl_recdir); | ||
1818 | struct nfs4_client *old = | ||
1819 | find_confirmed_client_by_str(conf->cl_recdir, hash); | ||
1820 | if (old) | ||
1821 | expire_client(old); | ||
1822 | move_to_confirmed(conf); | ||
1823 | } | ||
1824 | out: | 1835 | out: |
1825 | nfs4_unlock_state(); | 1836 | nfs4_unlock_state(); |
1826 | dprintk("%s returns %d\n", __func__, ntohl(status)); | 1837 | dprintk("%s returns %d\n", __func__, ntohl(status)); |
1827 | return status; | 1838 | return status; |
1839 | out_free_conn: | ||
1840 | free_conn(conn); | ||
1841 | out_free_session: | ||
1842 | __free_session(new); | ||
1843 | goto out; | ||
1828 | } | 1844 | } |
1829 | 1845 | ||
1830 | static bool nfsd4_last_compound_op(struct svc_rqst *rqstp) | 1846 | static bool nfsd4_last_compound_op(struct svc_rqst *rqstp) |
@@ -1854,6 +1870,7 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, | |||
1854 | struct nfsd4_bind_conn_to_session *bcts) | 1870 | struct nfsd4_bind_conn_to_session *bcts) |
1855 | { | 1871 | { |
1856 | __be32 status; | 1872 | __be32 status; |
1873 | struct nfsd4_conn *conn; | ||
1857 | 1874 | ||
1858 | if (!nfsd4_last_compound_op(rqstp)) | 1875 | if (!nfsd4_last_compound_op(rqstp)) |
1859 | return nfserr_not_only_op; | 1876 | return nfserr_not_only_op; |
@@ -1870,9 +1887,13 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, | |||
1870 | return nfserr_badsession; | 1887 | return nfserr_badsession; |
1871 | 1888 | ||
1872 | status = nfsd4_map_bcts_dir(&bcts->dir); | 1889 | status = nfsd4_map_bcts_dir(&bcts->dir); |
1873 | if (!status) | 1890 | if (status) |
1874 | nfsd4_new_conn(rqstp, cstate->session, bcts->dir); | 1891 | return status; |
1875 | return status; | 1892 | conn = alloc_conn(rqstp, bcts->dir); |
1893 | if (!conn) | ||
1894 | return nfserr_jukebox; | ||
1895 | nfsd4_init_conn(rqstp, conn, cstate->session); | ||
1896 | return nfs_ok; | ||
1876 | } | 1897 | } |
1877 | 1898 | ||
1878 | static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) | 1899 | static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) |
@@ -2085,8 +2106,8 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta | |||
2085 | __be32 status = 0; | 2106 | __be32 status = 0; |
2086 | 2107 | ||
2087 | nfs4_lock_state(); | 2108 | nfs4_lock_state(); |
2088 | unconf = find_unconfirmed_client(&dc->clientid); | 2109 | unconf = find_unconfirmed_client(&dc->clientid, true); |
2089 | conf = find_confirmed_client(&dc->clientid); | 2110 | conf = find_confirmed_client(&dc->clientid, true); |
2090 | 2111 | ||
2091 | if (conf) { | 2112 | if (conf) { |
2092 | clp = conf; | 2113 | clp = conf; |
@@ -2200,10 +2221,6 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2200 | copy_clid(new, conf); | 2221 | copy_clid(new, conf); |
2201 | else /* case 4 (new client) or cases 2, 3 (client reboot): */ | 2222 | else /* case 4 (new client) or cases 2, 3 (client reboot): */ |
2202 | gen_clid(new); | 2223 | gen_clid(new); |
2203 | /* | ||
2204 | * XXX: we should probably set this at creation time, and check | ||
2205 | * for consistent minorversion use throughout: | ||
2206 | */ | ||
2207 | new->cl_minorversion = 0; | 2224 | new->cl_minorversion = 0; |
2208 | gen_callback(new, setclid, rqstp); | 2225 | gen_callback(new, setclid, rqstp); |
2209 | add_to_unconfirmed(new, strhashval); | 2226 | add_to_unconfirmed(new, strhashval); |
@@ -2232,8 +2249,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
2232 | return nfserr_stale_clientid; | 2249 | return nfserr_stale_clientid; |
2233 | nfs4_lock_state(); | 2250 | nfs4_lock_state(); |
2234 | 2251 | ||
2235 | conf = find_confirmed_client(clid); | 2252 | conf = find_confirmed_client(clid, false); |
2236 | unconf = find_unconfirmed_client(clid); | 2253 | unconf = find_unconfirmed_client(clid, false); |
2237 | /* | 2254 | /* |
2238 | * We try hard to give out unique clientid's, so if we get an | 2255 | * We try hard to give out unique clientid's, so if we get an |
2239 | * attempt to confirm the same clientid with a different cred, | 2256 | * attempt to confirm the same clientid with a different cred, |
@@ -2262,10 +2279,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
2262 | unsigned int hash = clientstr_hashval(unconf->cl_recdir); | 2279 | unsigned int hash = clientstr_hashval(unconf->cl_recdir); |
2263 | 2280 | ||
2264 | conf = find_confirmed_client_by_str(unconf->cl_recdir, hash); | 2281 | conf = find_confirmed_client_by_str(unconf->cl_recdir, hash); |
2265 | if (conf) { | 2282 | if (conf) |
2266 | nfsd4_client_record_remove(conf); | ||
2267 | expire_client(conf); | 2283 | expire_client(conf); |
2268 | } | ||
2269 | move_to_confirmed(unconf); | 2284 | move_to_confirmed(unconf); |
2270 | nfsd4_probe_callback(unconf); | 2285 | nfsd4_probe_callback(unconf); |
2271 | } | 2286 | } |
@@ -2447,16 +2462,20 @@ same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, | |||
2447 | } | 2462 | } |
2448 | 2463 | ||
2449 | static struct nfs4_openowner * | 2464 | static struct nfs4_openowner * |
2450 | find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open) | 2465 | find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, bool sessions) |
2451 | { | 2466 | { |
2452 | struct nfs4_stateowner *so; | 2467 | struct nfs4_stateowner *so; |
2453 | struct nfs4_openowner *oo; | 2468 | struct nfs4_openowner *oo; |
2469 | struct nfs4_client *clp; | ||
2454 | 2470 | ||
2455 | list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) { | 2471 | list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) { |
2456 | if (!so->so_is_open_owner) | 2472 | if (!so->so_is_open_owner) |
2457 | continue; | 2473 | continue; |
2458 | if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { | 2474 | if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { |
2459 | oo = openowner(so); | 2475 | oo = openowner(so); |
2476 | clp = oo->oo_owner.so_client; | ||
2477 | if ((bool)clp->cl_minorversion != sessions) | ||
2478 | return NULL; | ||
2460 | renew_client(oo->oo_owner.so_client); | 2479 | renew_client(oo->oo_owner.so_client); |
2461 | return oo; | 2480 | return oo; |
2462 | } | 2481 | } |
@@ -2600,10 +2619,10 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, | |||
2600 | return nfserr_jukebox; | 2619 | return nfserr_jukebox; |
2601 | 2620 | ||
2602 | strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner); | 2621 | strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner); |
2603 | oo = find_openstateowner_str(strhashval, open); | 2622 | oo = find_openstateowner_str(strhashval, open, cstate->minorversion); |
2604 | open->op_openowner = oo; | 2623 | open->op_openowner = oo; |
2605 | if (!oo) { | 2624 | if (!oo) { |
2606 | clp = find_confirmed_client(clientid); | 2625 | clp = find_confirmed_client(clientid, cstate->minorversion); |
2607 | if (clp == NULL) | 2626 | if (clp == NULL) |
2608 | return nfserr_expired; | 2627 | return nfserr_expired; |
2609 | goto new_owner; | 2628 | goto new_owner; |
@@ -2705,11 +2724,6 @@ nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_st | |||
2705 | return nfs_ok; | 2724 | return nfs_ok; |
2706 | } | 2725 | } |
2707 | 2726 | ||
2708 | static void nfs4_free_stateid(struct nfs4_ol_stateid *s) | ||
2709 | { | ||
2710 | kmem_cache_free(stateid_slab, s); | ||
2711 | } | ||
2712 | |||
2713 | static inline int nfs4_access_to_access(u32 nfs4_access) | 2727 | static inline int nfs4_access_to_access(u32 nfs4_access) |
2714 | { | 2728 | { |
2715 | int flags = 0; | 2729 | int flags = 0; |
@@ -3087,7 +3101,7 @@ void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status) | |||
3087 | if (open->op_file) | 3101 | if (open->op_file) |
3088 | nfsd4_free_file(open->op_file); | 3102 | nfsd4_free_file(open->op_file); |
3089 | if (open->op_stp) | 3103 | if (open->op_stp) |
3090 | nfs4_free_stateid(open->op_stp); | 3104 | free_generic_stateid(open->op_stp); |
3091 | } | 3105 | } |
3092 | 3106 | ||
3093 | __be32 | 3107 | __be32 |
@@ -3104,7 +3118,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3104 | status = nfserr_stale_clientid; | 3118 | status = nfserr_stale_clientid; |
3105 | if (STALE_CLIENTID(clid, nn)) | 3119 | if (STALE_CLIENTID(clid, nn)) |
3106 | goto out; | 3120 | goto out; |
3107 | clp = find_confirmed_client(clid); | 3121 | clp = find_confirmed_client(clid, cstate->minorversion); |
3108 | status = nfserr_expired; | 3122 | status = nfserr_expired; |
3109 | if (clp == NULL) { | 3123 | if (clp == NULL) { |
3110 | /* We assume the client took too long to RENEW. */ | 3124 | /* We assume the client took too long to RENEW. */ |
@@ -3180,7 +3194,6 @@ nfs4_laundromat(void) | |||
3180 | clp = list_entry(pos, struct nfs4_client, cl_lru); | 3194 | clp = list_entry(pos, struct nfs4_client, cl_lru); |
3181 | dprintk("NFSD: purging unused client (clientid %08x)\n", | 3195 | dprintk("NFSD: purging unused client (clientid %08x)\n", |
3182 | clp->cl_clientid.cl_id); | 3196 | clp->cl_clientid.cl_id); |
3183 | nfsd4_client_record_remove(clp); | ||
3184 | expire_client(clp); | 3197 | expire_client(clp); |
3185 | } | 3198 | } |
3186 | spin_lock(&recall_lock); | 3199 | spin_lock(&recall_lock); |
@@ -3372,7 +3385,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) | |||
3372 | return nfs_ok; | 3385 | return nfs_ok; |
3373 | } | 3386 | } |
3374 | 3387 | ||
3375 | static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s) | 3388 | static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s, bool sessions) |
3376 | { | 3389 | { |
3377 | struct nfs4_client *cl; | 3390 | struct nfs4_client *cl; |
3378 | struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); | 3391 | struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); |
@@ -3381,7 +3394,7 @@ static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, s | |||
3381 | return nfserr_bad_stateid; | 3394 | return nfserr_bad_stateid; |
3382 | if (STALE_STATEID(stateid, nn)) | 3395 | if (STALE_STATEID(stateid, nn)) |
3383 | return nfserr_stale_stateid; | 3396 | return nfserr_stale_stateid; |
3384 | cl = find_confirmed_client(&stateid->si_opaque.so_clid); | 3397 | cl = find_confirmed_client(&stateid->si_opaque.so_clid, sessions); |
3385 | if (!cl) | 3398 | if (!cl) |
3386 | return nfserr_expired; | 3399 | return nfserr_expired; |
3387 | *s = find_stateid_by_type(cl, stateid, typemask); | 3400 | *s = find_stateid_by_type(cl, stateid, typemask); |
@@ -3414,7 +3427,7 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, | |||
3414 | if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) | 3427 | if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) |
3415 | return check_special_stateids(net, current_fh, stateid, flags); | 3428 | return check_special_stateids(net, current_fh, stateid, flags); |
3416 | 3429 | ||
3417 | status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s); | 3430 | status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s, cstate->minorversion); |
3418 | if (status) | 3431 | if (status) |
3419 | return status; | 3432 | return status; |
3420 | status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); | 3433 | status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); |
@@ -3564,7 +3577,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, | |||
3564 | seqid, STATEID_VAL(stateid)); | 3577 | seqid, STATEID_VAL(stateid)); |
3565 | 3578 | ||
3566 | *stpp = NULL; | 3579 | *stpp = NULL; |
3567 | status = nfsd4_lookup_stateid(stateid, typemask, &s); | 3580 | status = nfsd4_lookup_stateid(stateid, typemask, &s, cstate->minorversion); |
3568 | if (status) | 3581 | if (status) |
3569 | return status; | 3582 | return status; |
3570 | *stpp = openlockstateid(s); | 3583 | *stpp = openlockstateid(s); |
@@ -3765,6 +3778,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3765 | memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); | 3778 | memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); |
3766 | 3779 | ||
3767 | nfsd4_close_open_stateid(stp); | 3780 | nfsd4_close_open_stateid(stp); |
3781 | release_last_closed_stateid(oo); | ||
3768 | oo->oo_last_closed_stid = stp; | 3782 | oo->oo_last_closed_stid = stp; |
3769 | 3783 | ||
3770 | if (list_empty(&oo->oo_owner.so_stateids)) { | 3784 | if (list_empty(&oo->oo_owner.so_stateids)) { |
@@ -3801,7 +3815,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3801 | inode = cstate->current_fh.fh_dentry->d_inode; | 3815 | inode = cstate->current_fh.fh_dentry->d_inode; |
3802 | 3816 | ||
3803 | nfs4_lock_state(); | 3817 | nfs4_lock_state(); |
3804 | status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s); | 3818 | status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s, cstate->minorversion); |
3805 | if (status) | 3819 | if (status) |
3806 | goto out; | 3820 | goto out; |
3807 | dp = delegstateid(s); | 3821 | dp = delegstateid(s); |
@@ -4045,8 +4059,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4045 | struct nfs4_lockowner *lock_sop = NULL; | 4059 | struct nfs4_lockowner *lock_sop = NULL; |
4046 | struct nfs4_ol_stateid *lock_stp; | 4060 | struct nfs4_ol_stateid *lock_stp; |
4047 | struct file *filp = NULL; | 4061 | struct file *filp = NULL; |
4048 | struct file_lock file_lock; | 4062 | struct file_lock *file_lock = NULL; |
4049 | struct file_lock conflock; | 4063 | struct file_lock *conflock = NULL; |
4050 | __be32 status = 0; | 4064 | __be32 status = 0; |
4051 | bool new_state = false; | 4065 | bool new_state = false; |
4052 | int lkflg; | 4066 | int lkflg; |
@@ -4116,21 +4130,28 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4116 | if (!locks_in_grace(SVC_NET(rqstp)) && lock->lk_reclaim) | 4130 | if (!locks_in_grace(SVC_NET(rqstp)) && lock->lk_reclaim) |
4117 | goto out; | 4131 | goto out; |
4118 | 4132 | ||
4119 | locks_init_lock(&file_lock); | 4133 | file_lock = locks_alloc_lock(); |
4134 | if (!file_lock) { | ||
4135 | dprintk("NFSD: %s: unable to allocate lock!\n", __func__); | ||
4136 | status = nfserr_jukebox; | ||
4137 | goto out; | ||
4138 | } | ||
4139 | |||
4140 | locks_init_lock(file_lock); | ||
4120 | switch (lock->lk_type) { | 4141 | switch (lock->lk_type) { |
4121 | case NFS4_READ_LT: | 4142 | case NFS4_READ_LT: |
4122 | case NFS4_READW_LT: | 4143 | case NFS4_READW_LT: |
4123 | filp = find_readable_file(lock_stp->st_file); | 4144 | filp = find_readable_file(lock_stp->st_file); |
4124 | if (filp) | 4145 | if (filp) |
4125 | get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); | 4146 | get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); |
4126 | file_lock.fl_type = F_RDLCK; | 4147 | file_lock->fl_type = F_RDLCK; |
4127 | break; | 4148 | break; |
4128 | case NFS4_WRITE_LT: | 4149 | case NFS4_WRITE_LT: |
4129 | case NFS4_WRITEW_LT: | 4150 | case NFS4_WRITEW_LT: |
4130 | filp = find_writeable_file(lock_stp->st_file); | 4151 | filp = find_writeable_file(lock_stp->st_file); |
4131 | if (filp) | 4152 | if (filp) |
4132 | get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); | 4153 | get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); |
4133 | file_lock.fl_type = F_WRLCK; | 4154 | file_lock->fl_type = F_WRLCK; |
4134 | break; | 4155 | break; |
4135 | default: | 4156 | default: |
4136 | status = nfserr_inval; | 4157 | status = nfserr_inval; |
@@ -4140,22 +4161,23 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4140 | status = nfserr_openmode; | 4161 | status = nfserr_openmode; |
4141 | goto out; | 4162 | goto out; |
4142 | } | 4163 | } |
4143 | file_lock.fl_owner = (fl_owner_t)lock_sop; | 4164 | file_lock->fl_owner = (fl_owner_t)lock_sop; |
4144 | file_lock.fl_pid = current->tgid; | 4165 | file_lock->fl_pid = current->tgid; |
4145 | file_lock.fl_file = filp; | 4166 | file_lock->fl_file = filp; |
4146 | file_lock.fl_flags = FL_POSIX; | 4167 | file_lock->fl_flags = FL_POSIX; |
4147 | file_lock.fl_lmops = &nfsd_posix_mng_ops; | 4168 | file_lock->fl_lmops = &nfsd_posix_mng_ops; |
4148 | 4169 | file_lock->fl_start = lock->lk_offset; | |
4149 | file_lock.fl_start = lock->lk_offset; | 4170 | file_lock->fl_end = last_byte_offset(lock->lk_offset, lock->lk_length); |
4150 | file_lock.fl_end = last_byte_offset(lock->lk_offset, lock->lk_length); | 4171 | nfs4_transform_lock_offset(file_lock); |
4151 | nfs4_transform_lock_offset(&file_lock); | 4172 | |
4152 | 4173 | conflock = locks_alloc_lock(); | |
4153 | /* | 4174 | if (!conflock) { |
4154 | * Try to lock the file in the VFS. | 4175 | dprintk("NFSD: %s: unable to allocate lock!\n", __func__); |
4155 | * Note: locks.c uses the BKL to protect the inode's lock list. | 4176 | status = nfserr_jukebox; |
4156 | */ | 4177 | goto out; |
4178 | } | ||
4157 | 4179 | ||
4158 | err = vfs_lock_file(filp, F_SETLK, &file_lock, &conflock); | 4180 | err = vfs_lock_file(filp, F_SETLK, file_lock, conflock); |
4159 | switch (-err) { | 4181 | switch (-err) { |
4160 | case 0: /* success! */ | 4182 | case 0: /* success! */ |
4161 | update_stateid(&lock_stp->st_stid.sc_stateid); | 4183 | update_stateid(&lock_stp->st_stid.sc_stateid); |
@@ -4166,7 +4188,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4166 | case (EAGAIN): /* conflock holds conflicting lock */ | 4188 | case (EAGAIN): /* conflock holds conflicting lock */ |
4167 | status = nfserr_denied; | 4189 | status = nfserr_denied; |
4168 | dprintk("NFSD: nfsd4_lock: conflicting lock found!\n"); | 4190 | dprintk("NFSD: nfsd4_lock: conflicting lock found!\n"); |
4169 | nfs4_set_lock_denied(&conflock, &lock->lk_denied); | 4191 | nfs4_set_lock_denied(conflock, &lock->lk_denied); |
4170 | break; | 4192 | break; |
4171 | case (EDEADLK): | 4193 | case (EDEADLK): |
4172 | status = nfserr_deadlock; | 4194 | status = nfserr_deadlock; |
@@ -4181,6 +4203,10 @@ out: | |||
4181 | release_lockowner(lock_sop); | 4203 | release_lockowner(lock_sop); |
4182 | if (!cstate->replay_owner) | 4204 | if (!cstate->replay_owner) |
4183 | nfs4_unlock_state(); | 4205 | nfs4_unlock_state(); |
4206 | if (file_lock) | ||
4207 | locks_free_lock(file_lock); | ||
4208 | if (conflock) | ||
4209 | locks_free_lock(conflock); | ||
4184 | return status; | 4210 | return status; |
4185 | } | 4211 | } |
4186 | 4212 | ||
@@ -4209,7 +4235,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4209 | struct nfsd4_lockt *lockt) | 4235 | struct nfsd4_lockt *lockt) |
4210 | { | 4236 | { |
4211 | struct inode *inode; | 4237 | struct inode *inode; |
4212 | struct file_lock file_lock; | 4238 | struct file_lock *file_lock = NULL; |
4213 | struct nfs4_lockowner *lo; | 4239 | struct nfs4_lockowner *lo; |
4214 | __be32 status; | 4240 | __be32 status; |
4215 | struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); | 4241 | struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); |
@@ -4230,15 +4256,21 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4230 | goto out; | 4256 | goto out; |
4231 | 4257 | ||
4232 | inode = cstate->current_fh.fh_dentry->d_inode; | 4258 | inode = cstate->current_fh.fh_dentry->d_inode; |
4233 | locks_init_lock(&file_lock); | 4259 | file_lock = locks_alloc_lock(); |
4260 | if (!file_lock) { | ||
4261 | dprintk("NFSD: %s: unable to allocate lock!\n", __func__); | ||
4262 | status = nfserr_jukebox; | ||
4263 | goto out; | ||
4264 | } | ||
4265 | locks_init_lock(file_lock); | ||
4234 | switch (lockt->lt_type) { | 4266 | switch (lockt->lt_type) { |
4235 | case NFS4_READ_LT: | 4267 | case NFS4_READ_LT: |
4236 | case NFS4_READW_LT: | 4268 | case NFS4_READW_LT: |
4237 | file_lock.fl_type = F_RDLCK; | 4269 | file_lock->fl_type = F_RDLCK; |
4238 | break; | 4270 | break; |
4239 | case NFS4_WRITE_LT: | 4271 | case NFS4_WRITE_LT: |
4240 | case NFS4_WRITEW_LT: | 4272 | case NFS4_WRITEW_LT: |
4241 | file_lock.fl_type = F_WRLCK; | 4273 | file_lock->fl_type = F_WRLCK; |
4242 | break; | 4274 | break; |
4243 | default: | 4275 | default: |
4244 | dprintk("NFSD: nfs4_lockt: bad lock type!\n"); | 4276 | dprintk("NFSD: nfs4_lockt: bad lock type!\n"); |
@@ -4248,25 +4280,27 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4248 | 4280 | ||
4249 | lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner); | 4281 | lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner); |
4250 | if (lo) | 4282 | if (lo) |
4251 | file_lock.fl_owner = (fl_owner_t)lo; | 4283 | file_lock->fl_owner = (fl_owner_t)lo; |
4252 | file_lock.fl_pid = current->tgid; | 4284 | file_lock->fl_pid = current->tgid; |
4253 | file_lock.fl_flags = FL_POSIX; | 4285 | file_lock->fl_flags = FL_POSIX; |
4254 | 4286 | ||
4255 | file_lock.fl_start = lockt->lt_offset; | 4287 | file_lock->fl_start = lockt->lt_offset; |
4256 | file_lock.fl_end = last_byte_offset(lockt->lt_offset, lockt->lt_length); | 4288 | file_lock->fl_end = last_byte_offset(lockt->lt_offset, lockt->lt_length); |
4257 | 4289 | ||
4258 | nfs4_transform_lock_offset(&file_lock); | 4290 | nfs4_transform_lock_offset(file_lock); |
4259 | 4291 | ||
4260 | status = nfsd_test_lock(rqstp, &cstate->current_fh, &file_lock); | 4292 | status = nfsd_test_lock(rqstp, &cstate->current_fh, file_lock); |
4261 | if (status) | 4293 | if (status) |
4262 | goto out; | 4294 | goto out; |
4263 | 4295 | ||
4264 | if (file_lock.fl_type != F_UNLCK) { | 4296 | if (file_lock->fl_type != F_UNLCK) { |
4265 | status = nfserr_denied; | 4297 | status = nfserr_denied; |
4266 | nfs4_set_lock_denied(&file_lock, &lockt->lt_denied); | 4298 | nfs4_set_lock_denied(file_lock, &lockt->lt_denied); |
4267 | } | 4299 | } |
4268 | out: | 4300 | out: |
4269 | nfs4_unlock_state(); | 4301 | nfs4_unlock_state(); |
4302 | if (file_lock) | ||
4303 | locks_free_lock(file_lock); | ||
4270 | return status; | 4304 | return status; |
4271 | } | 4305 | } |
4272 | 4306 | ||
@@ -4276,7 +4310,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4276 | { | 4310 | { |
4277 | struct nfs4_ol_stateid *stp; | 4311 | struct nfs4_ol_stateid *stp; |
4278 | struct file *filp = NULL; | 4312 | struct file *filp = NULL; |
4279 | struct file_lock file_lock; | 4313 | struct file_lock *file_lock = NULL; |
4280 | __be32 status; | 4314 | __be32 status; |
4281 | int err; | 4315 | int err; |
4282 | 4316 | ||
@@ -4298,23 +4332,29 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4298 | status = nfserr_lock_range; | 4332 | status = nfserr_lock_range; |
4299 | goto out; | 4333 | goto out; |
4300 | } | 4334 | } |
4301 | BUG_ON(!filp); | 4335 | file_lock = locks_alloc_lock(); |
4302 | locks_init_lock(&file_lock); | 4336 | if (!file_lock) { |
4303 | file_lock.fl_type = F_UNLCK; | 4337 | dprintk("NFSD: %s: unable to allocate lock!\n", __func__); |
4304 | file_lock.fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); | 4338 | status = nfserr_jukebox; |
4305 | file_lock.fl_pid = current->tgid; | 4339 | goto out; |
4306 | file_lock.fl_file = filp; | 4340 | } |
4307 | file_lock.fl_flags = FL_POSIX; | 4341 | locks_init_lock(file_lock); |
4308 | file_lock.fl_lmops = &nfsd_posix_mng_ops; | 4342 | file_lock->fl_type = F_UNLCK; |
4309 | file_lock.fl_start = locku->lu_offset; | 4343 | file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); |
4310 | 4344 | file_lock->fl_pid = current->tgid; | |
4311 | file_lock.fl_end = last_byte_offset(locku->lu_offset, locku->lu_length); | 4345 | file_lock->fl_file = filp; |
4312 | nfs4_transform_lock_offset(&file_lock); | 4346 | file_lock->fl_flags = FL_POSIX; |
4347 | file_lock->fl_lmops = &nfsd_posix_mng_ops; | ||
4348 | file_lock->fl_start = locku->lu_offset; | ||
4349 | |||
4350 | file_lock->fl_end = last_byte_offset(locku->lu_offset, | ||
4351 | locku->lu_length); | ||
4352 | nfs4_transform_lock_offset(file_lock); | ||
4313 | 4353 | ||
4314 | /* | 4354 | /* |
4315 | * Try to unlock the file in the VFS. | 4355 | * Try to unlock the file in the VFS. |
4316 | */ | 4356 | */ |
4317 | err = vfs_lock_file(filp, F_SETLK, &file_lock, NULL); | 4357 | err = vfs_lock_file(filp, F_SETLK, file_lock, NULL); |
4318 | if (err) { | 4358 | if (err) { |
4319 | dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n"); | 4359 | dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n"); |
4320 | goto out_nfserr; | 4360 | goto out_nfserr; |
@@ -4328,6 +4368,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4328 | out: | 4368 | out: |
4329 | if (!cstate->replay_owner) | 4369 | if (!cstate->replay_owner) |
4330 | nfs4_unlock_state(); | 4370 | nfs4_unlock_state(); |
4371 | if (file_lock) | ||
4372 | locks_free_lock(file_lock); | ||
4331 | return status; | 4373 | return status; |
4332 | 4374 | ||
4333 | out_nfserr: | 4375 | out_nfserr: |
@@ -4501,12 +4543,12 @@ nfsd4_find_reclaim_client(struct nfs4_client *clp) | |||
4501 | * Called from OPEN. Look for clientid in reclaim list. | 4543 | * Called from OPEN. Look for clientid in reclaim list. |
4502 | */ | 4544 | */ |
4503 | __be32 | 4545 | __be32 |
4504 | nfs4_check_open_reclaim(clientid_t *clid) | 4546 | nfs4_check_open_reclaim(clientid_t *clid, bool sessions) |
4505 | { | 4547 | { |
4506 | struct nfs4_client *clp; | 4548 | struct nfs4_client *clp; |
4507 | 4549 | ||
4508 | /* find clientid in conf_id_hashtbl */ | 4550 | /* find clientid in conf_id_hashtbl */ |
4509 | clp = find_confirmed_client(clid); | 4551 | clp = find_confirmed_client(clid, sessions); |
4510 | if (clp == NULL) | 4552 | if (clp == NULL) |
4511 | return nfserr_reclaim_bad; | 4553 | return nfserr_reclaim_bad; |
4512 | 4554 | ||
@@ -4522,7 +4564,6 @@ void nfsd_forget_clients(u64 num) | |||
4522 | 4564 | ||
4523 | nfs4_lock_state(); | 4565 | nfs4_lock_state(); |
4524 | list_for_each_entry_safe(clp, next, &client_lru, cl_lru) { | 4566 | list_for_each_entry_safe(clp, next, &client_lru, cl_lru) { |
4525 | nfsd4_client_record_remove(clp); | ||
4526 | expire_client(clp); | 4567 | expire_client(clp); |
4527 | if (++count == num) | 4568 | if (++count == num) |
4528 | break; | 4569 | break; |
@@ -4582,7 +4623,7 @@ void nfsd_forget_openowners(u64 num) | |||
4582 | printk(KERN_INFO "NFSD: Forgot %d open owners", count); | 4623 | printk(KERN_INFO "NFSD: Forgot %d open owners", count); |
4583 | } | 4624 | } |
4584 | 4625 | ||
4585 | int nfsd_process_n_delegations(u64 num, struct list_head *list) | 4626 | static int nfsd_process_n_delegations(u64 num, struct list_head *list) |
4586 | { | 4627 | { |
4587 | int i, count = 0; | 4628 | int i, count = 0; |
4588 | struct nfs4_file *fp, *fnext; | 4629 | struct nfs4_file *fp, *fnext; |
@@ -4747,11 +4788,11 @@ __nfs4_state_shutdown(void) | |||
4747 | for (i = 0; i < CLIENT_HASH_SIZE; i++) { | 4788 | for (i = 0; i < CLIENT_HASH_SIZE; i++) { |
4748 | while (!list_empty(&conf_id_hashtbl[i])) { | 4789 | while (!list_empty(&conf_id_hashtbl[i])) { |
4749 | clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); | 4790 | clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); |
4750 | expire_client(clp); | 4791 | destroy_client(clp); |
4751 | } | 4792 | } |
4752 | while (!list_empty(&unconf_str_hashtbl[i])) { | 4793 | while (!list_empty(&unconf_str_hashtbl[i])) { |
4753 | clp = list_entry(unconf_str_hashtbl[i].next, struct nfs4_client, cl_strhash); | 4794 | clp = list_entry(unconf_str_hashtbl[i].next, struct nfs4_client, cl_strhash); |
4754 | expire_client(clp); | 4795 | destroy_client(clp); |
4755 | } | 4796 | } |
4756 | } | 4797 | } |
4757 | INIT_LIST_HEAD(&reaplist); | 4798 | INIT_LIST_HEAD(&reaplist); |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 6322df36031f..fd548d155088 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -2659,7 +2659,7 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, | |||
2659 | RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 8); | 2659 | RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 8); |
2660 | WRITEMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); | 2660 | WRITEMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); |
2661 | WRITE32(bcts->dir); | 2661 | WRITE32(bcts->dir); |
2662 | /* XXX: ? */ | 2662 | /* Sorry, we do not yet support RDMA over 4.1: */ |
2663 | WRITE32(0); | 2663 | WRITE32(0); |
2664 | ADJUST_ARGS(); | 2664 | ADJUST_ARGS(); |
2665 | } | 2665 | } |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index fa49cff5ee65..dab350dfc376 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -406,7 +406,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) | |||
406 | return rv; | 406 | return rv; |
407 | if (newthreads < 0) | 407 | if (newthreads < 0) |
408 | return -EINVAL; | 408 | return -EINVAL; |
409 | rv = nfsd_svc(NFS_PORT, newthreads); | 409 | rv = nfsd_svc(newthreads); |
410 | if (rv < 0) | 410 | if (rv < 0) |
411 | return rv; | 411 | return rv; |
412 | } else | 412 | } else |
@@ -683,25 +683,6 @@ static ssize_t __write_ports_addfd(char *buf) | |||
683 | } | 683 | } |
684 | 684 | ||
685 | /* | 685 | /* |
686 | * A '-' followed by the 'name' of a socket means we close the socket. | ||
687 | */ | ||
688 | static ssize_t __write_ports_delfd(char *buf) | ||
689 | { | ||
690 | char *toclose; | ||
691 | int len = 0; | ||
692 | |||
693 | toclose = kstrdup(buf + 1, GFP_KERNEL); | ||
694 | if (toclose == NULL) | ||
695 | return -ENOMEM; | ||
696 | |||
697 | if (nfsd_serv != NULL) | ||
698 | len = svc_sock_names(nfsd_serv, buf, | ||
699 | SIMPLE_TRANSACTION_LIMIT, toclose); | ||
700 | kfree(toclose); | ||
701 | return len; | ||
702 | } | ||
703 | |||
704 | /* | ||
705 | * A transport listener is added by writing it's transport name and | 686 | * A transport listener is added by writing it's transport name and |
706 | * a port number. | 687 | * a port number. |
707 | */ | 688 | */ |
@@ -712,7 +693,7 @@ static ssize_t __write_ports_addxprt(char *buf) | |||
712 | int port, err; | 693 | int port, err; |
713 | struct net *net = &init_net; | 694 | struct net *net = &init_net; |
714 | 695 | ||
715 | if (sscanf(buf, "%15s %4u", transport, &port) != 2) | 696 | if (sscanf(buf, "%15s %5u", transport, &port) != 2) |
716 | return -EINVAL; | 697 | return -EINVAL; |
717 | 698 | ||
718 | if (port < 1 || port > USHRT_MAX) | 699 | if (port < 1 || port > USHRT_MAX) |
@@ -746,31 +727,6 @@ out_err: | |||
746 | return err; | 727 | return err; |
747 | } | 728 | } |
748 | 729 | ||
749 | /* | ||
750 | * A transport listener is removed by writing a "-", it's transport | ||
751 | * name, and it's port number. | ||
752 | */ | ||
753 | static ssize_t __write_ports_delxprt(char *buf) | ||
754 | { | ||
755 | struct svc_xprt *xprt; | ||
756 | char transport[16]; | ||
757 | int port; | ||
758 | |||
759 | if (sscanf(&buf[1], "%15s %4u", transport, &port) != 2) | ||
760 | return -EINVAL; | ||
761 | |||
762 | if (port < 1 || port > USHRT_MAX || nfsd_serv == NULL) | ||
763 | return -EINVAL; | ||
764 | |||
765 | xprt = svc_find_xprt(nfsd_serv, transport, &init_net, AF_UNSPEC, port); | ||
766 | if (xprt == NULL) | ||
767 | return -ENOTCONN; | ||
768 | |||
769 | svc_close_xprt(xprt); | ||
770 | svc_xprt_put(xprt); | ||
771 | return 0; | ||
772 | } | ||
773 | |||
774 | static ssize_t __write_ports(struct file *file, char *buf, size_t size) | 730 | static ssize_t __write_ports(struct file *file, char *buf, size_t size) |
775 | { | 731 | { |
776 | if (size == 0) | 732 | if (size == 0) |
@@ -779,15 +735,9 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size) | |||
779 | if (isdigit(buf[0])) | 735 | if (isdigit(buf[0])) |
780 | return __write_ports_addfd(buf); | 736 | return __write_ports_addfd(buf); |
781 | 737 | ||
782 | if (buf[0] == '-' && isdigit(buf[1])) | ||
783 | return __write_ports_delfd(buf); | ||
784 | |||
785 | if (isalpha(buf[0])) | 738 | if (isalpha(buf[0])) |
786 | return __write_ports_addxprt(buf); | 739 | return __write_ports_addxprt(buf); |
787 | 740 | ||
788 | if (buf[0] == '-' && isalpha(buf[1])) | ||
789 | return __write_ports_delxprt(buf); | ||
790 | |||
791 | return -EINVAL; | 741 | return -EINVAL; |
792 | } | 742 | } |
793 | 743 | ||
@@ -825,21 +775,6 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size) | |||
825 | * OR | 775 | * OR |
826 | * | 776 | * |
827 | * Input: | 777 | * Input: |
828 | * buf: C string containing a "-" followed | ||
829 | * by an integer value representing a | ||
830 | * previously passed in socket file | ||
831 | * descriptor | ||
832 | * size: non-zero length of C string in @buf | ||
833 | * Output: | ||
834 | * On success: NFS service no longer listens on that socket; | ||
835 | * passed-in buffer filled with a '\n'-terminated C | ||
836 | * string containing a unique name of the listener; | ||
837 | * return code is the size in bytes of the string | ||
838 | * On error: return code is a negative errno value | ||
839 | * | ||
840 | * OR | ||
841 | * | ||
842 | * Input: | ||
843 | * buf: C string containing a transport | 778 | * buf: C string containing a transport |
844 | * name and an unsigned integer value | 779 | * name and an unsigned integer value |
845 | * representing the port to listen on, | 780 | * representing the port to listen on, |
@@ -848,19 +783,6 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size) | |||
848 | * Output: | 783 | * Output: |
849 | * On success: returns zero; NFS service is started | 784 | * On success: returns zero; NFS service is started |
850 | * On error: return code is a negative errno value | 785 | * On error: return code is a negative errno value |
851 | * | ||
852 | * OR | ||
853 | * | ||
854 | * Input: | ||
855 | * buf: C string containing a "-" followed | ||
856 | * by a transport name and an unsigned | ||
857 | * integer value representing the port | ||
858 | * to listen on, separated by whitespace | ||
859 | * size: non-zero length of C string in @buf | ||
860 | * Output: | ||
861 | * On success: returns zero; NFS service no longer listens | ||
862 | * on that transport | ||
863 | * On error: return code is a negative errno value | ||
864 | */ | 786 | */ |
865 | static ssize_t write_ports(struct file *file, char *buf, size_t size) | 787 | static ssize_t write_ports(struct file *file, char *buf, size_t size) |
866 | { | 788 | { |
@@ -1008,8 +930,6 @@ static ssize_t write_gracetime(struct file *file, char *buf, size_t size) | |||
1008 | return nfsd4_write_time(file, buf, size, &nfsd4_grace); | 930 | return nfsd4_write_time(file, buf, size, &nfsd4_grace); |
1009 | } | 931 | } |
1010 | 932 | ||
1011 | extern char *nfs4_recoverydir(void); | ||
1012 | |||
1013 | static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size) | 933 | static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size) |
1014 | { | 934 | { |
1015 | char *mesg = buf; | 935 | char *mesg = buf; |
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 2244222368ab..80d5ce40aadb 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h | |||
@@ -65,7 +65,7 @@ extern const struct seq_operations nfs_exports_op; | |||
65 | /* | 65 | /* |
66 | * Function prototypes. | 66 | * Function prototypes. |
67 | */ | 67 | */ |
68 | int nfsd_svc(unsigned short port, int nrservs); | 68 | int nfsd_svc(int nrservs); |
69 | int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp); | 69 | int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp); |
70 | 70 | ||
71 | int nfsd_nrthreads(void); | 71 | int nfsd_nrthreads(void); |
@@ -124,6 +124,7 @@ int nfs4_state_start(void); | |||
124 | void nfs4_state_shutdown(void); | 124 | void nfs4_state_shutdown(void); |
125 | void nfs4_reset_lease(time_t leasetime); | 125 | void nfs4_reset_lease(time_t leasetime); |
126 | int nfs4_reset_recoverydir(char *recdir); | 126 | int nfs4_reset_recoverydir(char *recdir); |
127 | char * nfs4_recoverydir(void); | ||
127 | #else | 128 | #else |
128 | static inline void nfs4_state_init(void) { } | 129 | static inline void nfs4_state_init(void) { } |
129 | static inline int nfsd4_init_slabs(void) { return 0; } | 130 | static inline int nfsd4_init_slabs(void) { return 0; } |
@@ -132,6 +133,7 @@ static inline int nfs4_state_start(void) { return 0; } | |||
132 | static inline void nfs4_state_shutdown(void) { } | 133 | static inline void nfs4_state_shutdown(void) { } |
133 | static inline void nfs4_reset_lease(time_t leasetime) { } | 134 | static inline void nfs4_reset_lease(time_t leasetime) { } |
134 | static inline int nfs4_reset_recoverydir(char *recdir) { return 0; } | 135 | static inline int nfs4_reset_recoverydir(char *recdir) { return 0; } |
136 | static inline char * nfs4_recoverydir(void) {return NULL; } | ||
135 | #endif | 137 | #endif |
136 | 138 | ||
137 | /* | 139 | /* |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 240473cb708f..2013aa001dab 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -183,18 +183,18 @@ int nfsd_nrthreads(void) | |||
183 | return rv; | 183 | return rv; |
184 | } | 184 | } |
185 | 185 | ||
186 | static int nfsd_init_socks(int port) | 186 | static int nfsd_init_socks(void) |
187 | { | 187 | { |
188 | int error; | 188 | int error; |
189 | if (!list_empty(&nfsd_serv->sv_permsocks)) | 189 | if (!list_empty(&nfsd_serv->sv_permsocks)) |
190 | return 0; | 190 | return 0; |
191 | 191 | ||
192 | error = svc_create_xprt(nfsd_serv, "udp", &init_net, PF_INET, port, | 192 | error = svc_create_xprt(nfsd_serv, "udp", &init_net, PF_INET, NFS_PORT, |
193 | SVC_SOCK_DEFAULTS); | 193 | SVC_SOCK_DEFAULTS); |
194 | if (error < 0) | 194 | if (error < 0) |
195 | return error; | 195 | return error; |
196 | 196 | ||
197 | error = svc_create_xprt(nfsd_serv, "tcp", &init_net, PF_INET, port, | 197 | error = svc_create_xprt(nfsd_serv, "tcp", &init_net, PF_INET, NFS_PORT, |
198 | SVC_SOCK_DEFAULTS); | 198 | SVC_SOCK_DEFAULTS); |
199 | if (error < 0) | 199 | if (error < 0) |
200 | return error; | 200 | return error; |
@@ -204,7 +204,7 @@ static int nfsd_init_socks(int port) | |||
204 | 204 | ||
205 | static bool nfsd_up = false; | 205 | static bool nfsd_up = false; |
206 | 206 | ||
207 | static int nfsd_startup(unsigned short port, int nrservs) | 207 | static int nfsd_startup(int nrservs) |
208 | { | 208 | { |
209 | int ret; | 209 | int ret; |
210 | 210 | ||
@@ -218,7 +218,7 @@ static int nfsd_startup(unsigned short port, int nrservs) | |||
218 | ret = nfsd_racache_init(2*nrservs); | 218 | ret = nfsd_racache_init(2*nrservs); |
219 | if (ret) | 219 | if (ret) |
220 | return ret; | 220 | return ret; |
221 | ret = nfsd_init_socks(port); | 221 | ret = nfsd_init_socks(); |
222 | if (ret) | 222 | if (ret) |
223 | goto out_racache; | 223 | goto out_racache; |
224 | ret = lockd_up(&init_net); | 224 | ret = lockd_up(&init_net); |
@@ -436,7 +436,7 @@ int nfsd_set_nrthreads(int n, int *nthreads) | |||
436 | * this is the first time nrservs is nonzero. | 436 | * this is the first time nrservs is nonzero. |
437 | */ | 437 | */ |
438 | int | 438 | int |
439 | nfsd_svc(unsigned short port, int nrservs) | 439 | nfsd_svc(int nrservs) |
440 | { | 440 | { |
441 | int error; | 441 | int error; |
442 | bool nfsd_up_before; | 442 | bool nfsd_up_before; |
@@ -458,7 +458,7 @@ nfsd_svc(unsigned short port, int nrservs) | |||
458 | 458 | ||
459 | nfsd_up_before = nfsd_up; | 459 | nfsd_up_before = nfsd_up; |
460 | 460 | ||
461 | error = nfsd_startup(port, nrservs); | 461 | error = nfsd_startup(nrservs); |
462 | if (error) | 462 | if (error) |
463 | goto out_destroy; | 463 | goto out_destroy; |
464 | error = svc_set_num_threads(nfsd_serv, NULL, nrservs); | 464 | error = svc_set_num_threads(nfsd_serv, NULL, nrservs); |
@@ -487,7 +487,7 @@ static int | |||
487 | nfsd(void *vrqstp) | 487 | nfsd(void *vrqstp) |
488 | { | 488 | { |
489 | struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; | 489 | struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; |
490 | int err, preverr = 0; | 490 | int err; |
491 | 491 | ||
492 | /* Lock module and set up kernel thread */ | 492 | /* Lock module and set up kernel thread */ |
493 | mutex_lock(&nfsd_mutex); | 493 | mutex_lock(&nfsd_mutex); |
@@ -534,16 +534,6 @@ nfsd(void *vrqstp) | |||
534 | ; | 534 | ; |
535 | if (err == -EINTR) | 535 | if (err == -EINTR) |
536 | break; | 536 | break; |
537 | else if (err < 0) { | ||
538 | if (err != preverr) { | ||
539 | printk(KERN_WARNING "%s: unexpected error " | ||
540 | "from svc_recv (%d)\n", __func__, -err); | ||
541 | preverr = err; | ||
542 | } | ||
543 | schedule_timeout_uninterruptible(HZ); | ||
544 | continue; | ||
545 | } | ||
546 | |||
547 | validate_process_creds(); | 537 | validate_process_creds(); |
548 | svc_process(rqstp); | 538 | svc_process(rqstp); |
549 | validate_process_creds(); | 539 | validate_process_creds(); |
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 22bd0a66c356..e036894bce57 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h | |||
@@ -373,11 +373,7 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so) | |||
373 | return container_of(so, struct nfs4_lockowner, lo_owner); | 373 | return container_of(so, struct nfs4_lockowner, lo_owner); |
374 | } | 374 | } |
375 | 375 | ||
376 | /* | 376 | /* nfs4_file: a file opened by some number of (open) nfs4_stateowners. */ |
377 | * nfs4_file: a file opened by some number of (open) nfs4_stateowners. | ||
378 | * o fi_perfile list is used to search for conflicting | ||
379 | * share_acces, share_deny on the file. | ||
380 | */ | ||
381 | struct nfs4_file { | 377 | struct nfs4_file { |
382 | atomic_t fi_ref; | 378 | atomic_t fi_ref; |
383 | struct list_head fi_hash; /* hash by "struct inode *" */ | 379 | struct list_head fi_hash; /* hash by "struct inode *" */ |
@@ -459,7 +455,7 @@ extern void nfs4_unlock_state(void); | |||
459 | extern int nfs4_in_grace(void); | 455 | extern int nfs4_in_grace(void); |
460 | extern void nfs4_release_reclaim(void); | 456 | extern void nfs4_release_reclaim(void); |
461 | extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(struct nfs4_client *crp); | 457 | extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(struct nfs4_client *crp); |
462 | extern __be32 nfs4_check_open_reclaim(clientid_t *clid); | 458 | extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions); |
463 | extern void nfs4_free_openowner(struct nfs4_openowner *); | 459 | extern void nfs4_free_openowner(struct nfs4_openowner *); |
464 | extern void nfs4_free_lockowner(struct nfs4_lockowner *); | 460 | extern void nfs4_free_lockowner(struct nfs4_lockowner *); |
465 | extern int set_callback_cred(void); | 461 | extern int set_callback_cred(void); |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 3f67b8e12251..c120b48ec305 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -1581,7 +1581,7 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) | |||
1581 | */ | 1581 | */ |
1582 | 1582 | ||
1583 | oldfs = get_fs(); set_fs(KERNEL_DS); | 1583 | oldfs = get_fs(); set_fs(KERNEL_DS); |
1584 | host_err = inode->i_op->readlink(path.dentry, buf, *lenp); | 1584 | host_err = inode->i_op->readlink(path.dentry, (char __user *)buf, *lenp); |
1585 | set_fs(oldfs); | 1585 | set_fs(oldfs); |
1586 | 1586 | ||
1587 | if (host_err < 0) | 1587 | if (host_err < 0) |
@@ -478,7 +478,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode) | |||
478 | 478 | ||
479 | file = fget(fd); | 479 | file = fget(fd); |
480 | if (file) { | 480 | if (file) { |
481 | audit_inode(NULL, file->f_path.dentry); | 481 | audit_inode(NULL, file->f_path.dentry, 0); |
482 | err = chmod_common(&file->f_path, mode); | 482 | err = chmod_common(&file->f_path, mode); |
483 | fput(file); | 483 | fput(file); |
484 | } | 484 | } |
@@ -588,7 +588,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) | |||
588 | error = mnt_want_write_file(f.file); | 588 | error = mnt_want_write_file(f.file); |
589 | if (error) | 589 | if (error) |
590 | goto out_fput; | 590 | goto out_fput; |
591 | audit_inode(NULL, f.file->f_path.dentry); | 591 | audit_inode(NULL, f.file->f_path.dentry, 0); |
592 | error = chown_common(&f.file->f_path, user, group); | 592 | error = chown_common(&f.file->f_path, user, group); |
593 | mnt_drop_write_file(f.file); | 593 | mnt_drop_write_file(f.file); |
594 | out_fput: | 594 | out_fput: |
@@ -859,6 +859,24 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o | |||
859 | } | 859 | } |
860 | 860 | ||
861 | /** | 861 | /** |
862 | * file_open_name - open file and return file pointer | ||
863 | * | ||
864 | * @name: struct filename containing path to open | ||
865 | * @flags: open flags as per the open(2) second argument | ||
866 | * @mode: mode for the new file if O_CREAT is set, else ignored | ||
867 | * | ||
868 | * This is the helper to open a file from kernelspace if you really | ||
869 | * have to. But in generally you should not do this, so please move | ||
870 | * along, nothing to see here.. | ||
871 | */ | ||
872 | struct file *file_open_name(struct filename *name, int flags, umode_t mode) | ||
873 | { | ||
874 | struct open_flags op; | ||
875 | int lookup = build_open_flags(flags, mode, &op); | ||
876 | return do_filp_open(AT_FDCWD, name, &op, lookup); | ||
877 | } | ||
878 | |||
879 | /** | ||
862 | * filp_open - open file and return file pointer | 880 | * filp_open - open file and return file pointer |
863 | * | 881 | * |
864 | * @filename: path to open | 882 | * @filename: path to open |
@@ -871,9 +889,8 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o | |||
871 | */ | 889 | */ |
872 | struct file *filp_open(const char *filename, int flags, umode_t mode) | 890 | struct file *filp_open(const char *filename, int flags, umode_t mode) |
873 | { | 891 | { |
874 | struct open_flags op; | 892 | struct filename name = {.name = filename}; |
875 | int lookup = build_open_flags(flags, mode, &op); | 893 | return file_open_name(&name, flags, mode); |
876 | return do_filp_open(AT_FDCWD, filename, &op, lookup); | ||
877 | } | 894 | } |
878 | EXPORT_SYMBOL(filp_open); | 895 | EXPORT_SYMBOL(filp_open); |
879 | 896 | ||
@@ -895,7 +912,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) | |||
895 | { | 912 | { |
896 | struct open_flags op; | 913 | struct open_flags op; |
897 | int lookup = build_open_flags(flags, mode, &op); | 914 | int lookup = build_open_flags(flags, mode, &op); |
898 | char *tmp = getname(filename); | 915 | struct filename *tmp = getname(filename); |
899 | int fd = PTR_ERR(tmp); | 916 | int fd = PTR_ERR(tmp); |
900 | 917 | ||
901 | if (!IS_ERR(tmp)) { | 918 | if (!IS_ERR(tmp)) { |
diff --git a/fs/proc/base.c b/fs/proc/base.c index ef5c84be66f9..144a96732dd7 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -2258,7 +2258,8 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
2258 | pid_t tgid = task_tgid_nr_ns(current, ns); | 2258 | pid_t tgid = task_tgid_nr_ns(current, ns); |
2259 | char *name = ERR_PTR(-ENOENT); | 2259 | char *name = ERR_PTR(-ENOENT); |
2260 | if (tgid) { | 2260 | if (tgid) { |
2261 | name = __getname(); | 2261 | /* 11 for max length of signed int in decimal + NULL term */ |
2262 | name = kmalloc(12, GFP_KERNEL); | ||
2262 | if (!name) | 2263 | if (!name) |
2263 | name = ERR_PTR(-ENOMEM); | 2264 | name = ERR_PTR(-ENOMEM); |
2264 | else | 2265 | else |
@@ -2273,7 +2274,7 @@ static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, | |||
2273 | { | 2274 | { |
2274 | char *s = nd_get_link(nd); | 2275 | char *s = nd_get_link(nd); |
2275 | if (!IS_ERR(s)) | 2276 | if (!IS_ERR(s)) |
2276 | __putname(s); | 2277 | kfree(s); |
2277 | } | 2278 | } |
2278 | 2279 | ||
2279 | static const struct inode_operations proc_self_inode_operations = { | 2280 | static const struct inode_operations proc_self_inode_operations = { |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 79827ce03e3b..14df8806ff29 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -1158,6 +1158,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1158 | struct vm_area_struct *vma = v; | 1158 | struct vm_area_struct *vma = v; |
1159 | struct numa_maps *md = &numa_priv->md; | 1159 | struct numa_maps *md = &numa_priv->md; |
1160 | struct file *file = vma->vm_file; | 1160 | struct file *file = vma->vm_file; |
1161 | struct task_struct *task = proc_priv->task; | ||
1161 | struct mm_struct *mm = vma->vm_mm; | 1162 | struct mm_struct *mm = vma->vm_mm; |
1162 | struct mm_walk walk = {}; | 1163 | struct mm_walk walk = {}; |
1163 | struct mempolicy *pol; | 1164 | struct mempolicy *pol; |
@@ -1177,9 +1178,11 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1177 | walk.private = md; | 1178 | walk.private = md; |
1178 | walk.mm = mm; | 1179 | walk.mm = mm; |
1179 | 1180 | ||
1180 | pol = get_vma_policy(proc_priv->task, vma, vma->vm_start); | 1181 | task_lock(task); |
1182 | pol = get_vma_policy(task, vma, vma->vm_start); | ||
1181 | mpol_to_str(buffer, sizeof(buffer), pol, 0); | 1183 | mpol_to_str(buffer, sizeof(buffer), pol, 0); |
1182 | mpol_cond_put(pol); | 1184 | mpol_cond_put(pol); |
1185 | task_unlock(task); | ||
1183 | 1186 | ||
1184 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); | 1187 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); |
1185 | 1188 | ||
@@ -1189,7 +1192,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1189 | } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { | 1192 | } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { |
1190 | seq_printf(m, " heap"); | 1193 | seq_printf(m, " heap"); |
1191 | } else { | 1194 | } else { |
1192 | pid_t tid = vm_is_stack(proc_priv->task, vma, is_pid); | 1195 | pid_t tid = vm_is_stack(task, vma, is_pid); |
1193 | if (tid != 0) { | 1196 | if (tid != 0) { |
1194 | /* | 1197 | /* |
1195 | * Thread stack in /proc/PID/task/TID/maps or | 1198 | * Thread stack in /proc/PID/task/TID/maps or |
diff --git a/fs/quota/quota.c b/fs/quota/quota.c index ff0135d6bc51..af1661f7a54f 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c | |||
@@ -331,11 +331,11 @@ static struct super_block *quotactl_block(const char __user *special, int cmd) | |||
331 | #ifdef CONFIG_BLOCK | 331 | #ifdef CONFIG_BLOCK |
332 | struct block_device *bdev; | 332 | struct block_device *bdev; |
333 | struct super_block *sb; | 333 | struct super_block *sb; |
334 | char *tmp = getname(special); | 334 | struct filename *tmp = getname(special); |
335 | 335 | ||
336 | if (IS_ERR(tmp)) | 336 | if (IS_ERR(tmp)) |
337 | return ERR_CAST(tmp); | 337 | return ERR_CAST(tmp); |
338 | bdev = lookup_bdev(tmp); | 338 | bdev = lookup_bdev(tmp->name); |
339 | putname(tmp); | 339 | putname(tmp); |
340 | if (IS_ERR(bdev)) | 340 | if (IS_ERR(bdev)) |
341 | return ERR_CAST(bdev); | 341 | return ERR_CAST(bdev); |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 46485557cdc6..f27f01a98aa2 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -1573,8 +1573,10 @@ struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, | |||
1573 | reiserfs_warning(sb, "reiserfs-13077", | 1573 | reiserfs_warning(sb, "reiserfs-13077", |
1574 | "nfsd/reiserfs, fhtype=%d, len=%d - odd", | 1574 | "nfsd/reiserfs, fhtype=%d, len=%d - odd", |
1575 | fh_type, fh_len); | 1575 | fh_type, fh_len); |
1576 | fh_type = 5; | 1576 | fh_type = fh_len; |
1577 | } | 1577 | } |
1578 | if (fh_len < 2) | ||
1579 | return NULL; | ||
1578 | 1580 | ||
1579 | return reiserfs_get_dentry(sb, fid->raw[0], fid->raw[1], | 1581 | return reiserfs_get_dentry(sb, fid->raw[0], fid->raw[1], |
1580 | (fh_type == 3 || fh_type >= 5) ? fid->raw[2] : 0); | 1582 | (fh_type == 3 || fh_type >= 5) ? fid->raw[2] : 0); |
@@ -1583,6 +1585,8 @@ struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, | |||
1583 | struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, | 1585 | struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, |
1584 | int fh_len, int fh_type) | 1586 | int fh_len, int fh_type) |
1585 | { | 1587 | { |
1588 | if (fh_type > fh_len) | ||
1589 | fh_type = fh_len; | ||
1586 | if (fh_type < 4) | 1590 | if (fh_type < 4) |
1587 | return NULL; | 1591 | return NULL; |
1588 | 1592 | ||
diff --git a/fs/super.c b/fs/super.c index a3bc935069d9..12f123712161 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -186,15 +186,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
186 | spin_lock_init(&s->s_inode_lru_lock); | 186 | spin_lock_init(&s->s_inode_lru_lock); |
187 | INIT_LIST_HEAD(&s->s_mounts); | 187 | INIT_LIST_HEAD(&s->s_mounts); |
188 | init_rwsem(&s->s_umount); | 188 | init_rwsem(&s->s_umount); |
189 | mutex_init(&s->s_lock); | ||
190 | lockdep_set_class(&s->s_umount, &type->s_umount_key); | 189 | lockdep_set_class(&s->s_umount, &type->s_umount_key); |
191 | /* | 190 | /* |
192 | * The locking rules for s_lock are up to the | ||
193 | * filesystem. For example ext3fs has different | ||
194 | * lock ordering than usbfs: | ||
195 | */ | ||
196 | lockdep_set_class(&s->s_lock, &type->s_lock_key); | ||
197 | /* | ||
198 | * sget() can have s_umount recursion. | 191 | * sget() can have s_umount recursion. |
199 | * | 192 | * |
200 | * When it cannot find a suitable sb, it allocates a new | 193 | * When it cannot find a suitable sb, it allocates a new |
@@ -394,22 +387,6 @@ bool grab_super_passive(struct super_block *sb) | |||
394 | return false; | 387 | return false; |
395 | } | 388 | } |
396 | 389 | ||
397 | /* | ||
398 | * Superblock locking. We really ought to get rid of these two. | ||
399 | */ | ||
400 | void lock_super(struct super_block * sb) | ||
401 | { | ||
402 | mutex_lock(&sb->s_lock); | ||
403 | } | ||
404 | |||
405 | void unlock_super(struct super_block * sb) | ||
406 | { | ||
407 | mutex_unlock(&sb->s_lock); | ||
408 | } | ||
409 | |||
410 | EXPORT_SYMBOL(lock_super); | ||
411 | EXPORT_SYMBOL(unlock_super); | ||
412 | |||
413 | /** | 390 | /** |
414 | * generic_shutdown_super - common helper for ->kill_sb() | 391 | * generic_shutdown_super - common helper for ->kill_sb() |
415 | * @sb: superblock to kill | 392 | * @sb: superblock to kill |
diff --git a/fs/sysv/balloc.c b/fs/sysv/balloc.c index 9a6ad96acf27..921c053fc052 100644 --- a/fs/sysv/balloc.c +++ b/fs/sysv/balloc.c | |||
@@ -60,12 +60,12 @@ void sysv_free_block(struct super_block * sb, sysv_zone_t nr) | |||
60 | return; | 60 | return; |
61 | } | 61 | } |
62 | 62 | ||
63 | lock_super(sb); | 63 | mutex_lock(&sbi->s_lock); |
64 | count = fs16_to_cpu(sbi, *sbi->s_bcache_count); | 64 | count = fs16_to_cpu(sbi, *sbi->s_bcache_count); |
65 | 65 | ||
66 | if (count > sbi->s_flc_size) { | 66 | if (count > sbi->s_flc_size) { |
67 | printk("sysv_free_block: flc_count > flc_size\n"); | 67 | printk("sysv_free_block: flc_count > flc_size\n"); |
68 | unlock_super(sb); | 68 | mutex_unlock(&sbi->s_lock); |
69 | return; | 69 | return; |
70 | } | 70 | } |
71 | /* If the free list head in super-block is full, it is copied | 71 | /* If the free list head in super-block is full, it is copied |
@@ -77,7 +77,7 @@ void sysv_free_block(struct super_block * sb, sysv_zone_t nr) | |||
77 | bh = sb_getblk(sb, block); | 77 | bh = sb_getblk(sb, block); |
78 | if (!bh) { | 78 | if (!bh) { |
79 | printk("sysv_free_block: getblk() failed\n"); | 79 | printk("sysv_free_block: getblk() failed\n"); |
80 | unlock_super(sb); | 80 | mutex_unlock(&sbi->s_lock); |
81 | return; | 81 | return; |
82 | } | 82 | } |
83 | memset(bh->b_data, 0, sb->s_blocksize); | 83 | memset(bh->b_data, 0, sb->s_blocksize); |
@@ -93,7 +93,7 @@ void sysv_free_block(struct super_block * sb, sysv_zone_t nr) | |||
93 | *sbi->s_bcache_count = cpu_to_fs16(sbi, count); | 93 | *sbi->s_bcache_count = cpu_to_fs16(sbi, count); |
94 | fs32_add(sbi, sbi->s_free_blocks, 1); | 94 | fs32_add(sbi, sbi->s_free_blocks, 1); |
95 | dirty_sb(sb); | 95 | dirty_sb(sb); |
96 | unlock_super(sb); | 96 | mutex_unlock(&sbi->s_lock); |
97 | } | 97 | } |
98 | 98 | ||
99 | sysv_zone_t sysv_new_block(struct super_block * sb) | 99 | sysv_zone_t sysv_new_block(struct super_block * sb) |
@@ -104,7 +104,7 @@ sysv_zone_t sysv_new_block(struct super_block * sb) | |||
104 | struct buffer_head * bh; | 104 | struct buffer_head * bh; |
105 | unsigned count; | 105 | unsigned count; |
106 | 106 | ||
107 | lock_super(sb); | 107 | mutex_lock(&sbi->s_lock); |
108 | count = fs16_to_cpu(sbi, *sbi->s_bcache_count); | 108 | count = fs16_to_cpu(sbi, *sbi->s_bcache_count); |
109 | 109 | ||
110 | if (count == 0) /* Applies only to Coherent FS */ | 110 | if (count == 0) /* Applies only to Coherent FS */ |
@@ -147,11 +147,11 @@ sysv_zone_t sysv_new_block(struct super_block * sb) | |||
147 | /* Now the free list head in the superblock is valid again. */ | 147 | /* Now the free list head in the superblock is valid again. */ |
148 | fs32_add(sbi, sbi->s_free_blocks, -1); | 148 | fs32_add(sbi, sbi->s_free_blocks, -1); |
149 | dirty_sb(sb); | 149 | dirty_sb(sb); |
150 | unlock_super(sb); | 150 | mutex_unlock(&sbi->s_lock); |
151 | return nr; | 151 | return nr; |
152 | 152 | ||
153 | Enospc: | 153 | Enospc: |
154 | unlock_super(sb); | 154 | mutex_unlock(&sbi->s_lock); |
155 | return 0; | 155 | return 0; |
156 | } | 156 | } |
157 | 157 | ||
@@ -173,7 +173,7 @@ unsigned long sysv_count_free_blocks(struct super_block * sb) | |||
173 | if (sbi->s_type == FSTYPE_AFS) | 173 | if (sbi->s_type == FSTYPE_AFS) |
174 | return 0; | 174 | return 0; |
175 | 175 | ||
176 | lock_super(sb); | 176 | mutex_lock(&sbi->s_lock); |
177 | sb_count = fs32_to_cpu(sbi, *sbi->s_free_blocks); | 177 | sb_count = fs32_to_cpu(sbi, *sbi->s_free_blocks); |
178 | 178 | ||
179 | if (0) | 179 | if (0) |
@@ -211,7 +211,7 @@ unsigned long sysv_count_free_blocks(struct super_block * sb) | |||
211 | if (count != sb_count) | 211 | if (count != sb_count) |
212 | goto Ecount; | 212 | goto Ecount; |
213 | done: | 213 | done: |
214 | unlock_super(sb); | 214 | mutex_unlock(&sbi->s_lock); |
215 | return count; | 215 | return count; |
216 | 216 | ||
217 | Einval: | 217 | Einval: |
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c index 8233b02eccae..f9db4eb31db4 100644 --- a/fs/sysv/ialloc.c +++ b/fs/sysv/ialloc.c | |||
@@ -118,7 +118,7 @@ void sysv_free_inode(struct inode * inode) | |||
118 | "%s\n", inode->i_sb->s_id); | 118 | "%s\n", inode->i_sb->s_id); |
119 | return; | 119 | return; |
120 | } | 120 | } |
121 | lock_super(sb); | 121 | mutex_lock(&sbi->s_lock); |
122 | count = fs16_to_cpu(sbi, *sbi->s_sb_fic_count); | 122 | count = fs16_to_cpu(sbi, *sbi->s_sb_fic_count); |
123 | if (count < sbi->s_fic_size) { | 123 | if (count < sbi->s_fic_size) { |
124 | *sv_sb_fic_inode(sb,count++) = cpu_to_fs16(sbi, ino); | 124 | *sv_sb_fic_inode(sb,count++) = cpu_to_fs16(sbi, ino); |
@@ -128,7 +128,7 @@ void sysv_free_inode(struct inode * inode) | |||
128 | dirty_sb(sb); | 128 | dirty_sb(sb); |
129 | memset(raw_inode, 0, sizeof(struct sysv_inode)); | 129 | memset(raw_inode, 0, sizeof(struct sysv_inode)); |
130 | mark_buffer_dirty(bh); | 130 | mark_buffer_dirty(bh); |
131 | unlock_super(sb); | 131 | mutex_unlock(&sbi->s_lock); |
132 | brelse(bh); | 132 | brelse(bh); |
133 | } | 133 | } |
134 | 134 | ||
@@ -147,13 +147,13 @@ struct inode * sysv_new_inode(const struct inode * dir, umode_t mode) | |||
147 | if (!inode) | 147 | if (!inode) |
148 | return ERR_PTR(-ENOMEM); | 148 | return ERR_PTR(-ENOMEM); |
149 | 149 | ||
150 | lock_super(sb); | 150 | mutex_lock(&sbi->s_lock); |
151 | count = fs16_to_cpu(sbi, *sbi->s_sb_fic_count); | 151 | count = fs16_to_cpu(sbi, *sbi->s_sb_fic_count); |
152 | if (count == 0 || (*sv_sb_fic_inode(sb,count-1) == 0)) { | 152 | if (count == 0 || (*sv_sb_fic_inode(sb,count-1) == 0)) { |
153 | count = refill_free_cache(sb); | 153 | count = refill_free_cache(sb); |
154 | if (count == 0) { | 154 | if (count == 0) { |
155 | iput(inode); | 155 | iput(inode); |
156 | unlock_super(sb); | 156 | mutex_unlock(&sbi->s_lock); |
157 | return ERR_PTR(-ENOSPC); | 157 | return ERR_PTR(-ENOSPC); |
158 | } | 158 | } |
159 | } | 159 | } |
@@ -174,7 +174,7 @@ struct inode * sysv_new_inode(const struct inode * dir, umode_t mode) | |||
174 | sysv_write_inode(inode, &wbc); /* ensure inode not allocated again */ | 174 | sysv_write_inode(inode, &wbc); /* ensure inode not allocated again */ |
175 | mark_inode_dirty(inode); /* cleared by sysv_write_inode() */ | 175 | mark_inode_dirty(inode); /* cleared by sysv_write_inode() */ |
176 | /* That's it. */ | 176 | /* That's it. */ |
177 | unlock_super(sb); | 177 | mutex_unlock(&sbi->s_lock); |
178 | return inode; | 178 | return inode; |
179 | } | 179 | } |
180 | 180 | ||
@@ -185,7 +185,7 @@ unsigned long sysv_count_free_inodes(struct super_block * sb) | |||
185 | struct sysv_inode * raw_inode; | 185 | struct sysv_inode * raw_inode; |
186 | int ino, count, sb_count; | 186 | int ino, count, sb_count; |
187 | 187 | ||
188 | lock_super(sb); | 188 | mutex_lock(&sbi->s_lock); |
189 | 189 | ||
190 | sb_count = fs16_to_cpu(sbi, *sbi->s_sb_total_free_inodes); | 190 | sb_count = fs16_to_cpu(sbi, *sbi->s_sb_total_free_inodes); |
191 | 191 | ||
@@ -213,7 +213,7 @@ unsigned long sysv_count_free_inodes(struct super_block * sb) | |||
213 | if (count != sb_count) | 213 | if (count != sb_count) |
214 | goto Einval; | 214 | goto Einval; |
215 | out: | 215 | out: |
216 | unlock_super(sb); | 216 | mutex_unlock(&sbi->s_lock); |
217 | return count; | 217 | return count; |
218 | 218 | ||
219 | Einval: | 219 | Einval: |
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index d33e506c1eac..c327d4ee1235 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c | |||
@@ -36,7 +36,7 @@ static int sysv_sync_fs(struct super_block *sb, int wait) | |||
36 | struct sysv_sb_info *sbi = SYSV_SB(sb); | 36 | struct sysv_sb_info *sbi = SYSV_SB(sb); |
37 | unsigned long time = get_seconds(), old_time; | 37 | unsigned long time = get_seconds(), old_time; |
38 | 38 | ||
39 | lock_super(sb); | 39 | mutex_lock(&sbi->s_lock); |
40 | 40 | ||
41 | /* | 41 | /* |
42 | * If we are going to write out the super block, | 42 | * If we are going to write out the super block, |
@@ -51,7 +51,7 @@ static int sysv_sync_fs(struct super_block *sb, int wait) | |||
51 | mark_buffer_dirty(sbi->s_bh2); | 51 | mark_buffer_dirty(sbi->s_bh2); |
52 | } | 52 | } |
53 | 53 | ||
54 | unlock_super(sb); | 54 | mutex_unlock(&sbi->s_lock); |
55 | 55 | ||
56 | return 0; | 56 | return 0; |
57 | } | 57 | } |
diff --git a/fs/sysv/super.c b/fs/sysv/super.c index 7491c33b6468..a38e87bdd78d 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c | |||
@@ -368,6 +368,7 @@ static int sysv_fill_super(struct super_block *sb, void *data, int silent) | |||
368 | 368 | ||
369 | sbi->s_sb = sb; | 369 | sbi->s_sb = sb; |
370 | sbi->s_block_base = 0; | 370 | sbi->s_block_base = 0; |
371 | mutex_init(&sbi->s_lock); | ||
371 | sb->s_fs_info = sbi; | 372 | sb->s_fs_info = sbi; |
372 | 373 | ||
373 | sb_set_blocksize(sb, BLOCK_SIZE); | 374 | sb_set_blocksize(sb, BLOCK_SIZE); |
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 0bc35fdc58e2..69d488986cce 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h | |||
@@ -58,6 +58,7 @@ struct sysv_sb_info { | |||
58 | u32 s_nzones; /* same as s_sbd->s_fsize */ | 58 | u32 s_nzones; /* same as s_sbd->s_fsize */ |
59 | u16 s_namelen; /* max length of dir entry */ | 59 | u16 s_namelen; /* max length of dir entry */ |
60 | int s_forced_ro; | 60 | int s_forced_ro; |
61 | struct mutex s_lock; | ||
61 | }; | 62 | }; |
62 | 63 | ||
63 | /* | 64 | /* |
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 1b3e410bf334..a7ea492ae660 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c | |||
@@ -54,7 +54,7 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) | |||
54 | if (ufs_fragnum(fragment) + count > uspi->s_fpg) | 54 | if (ufs_fragnum(fragment) + count > uspi->s_fpg) |
55 | ufs_error (sb, "ufs_free_fragments", "internal error"); | 55 | ufs_error (sb, "ufs_free_fragments", "internal error"); |
56 | 56 | ||
57 | lock_super(sb); | 57 | mutex_lock(&UFS_SB(sb)->s_lock); |
58 | 58 | ||
59 | cgno = ufs_dtog(uspi, fragment); | 59 | cgno = ufs_dtog(uspi, fragment); |
60 | bit = ufs_dtogd(uspi, fragment); | 60 | bit = ufs_dtogd(uspi, fragment); |
@@ -118,12 +118,12 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) | |||
118 | ubh_sync_block(UCPI_UBH(ucpi)); | 118 | ubh_sync_block(UCPI_UBH(ucpi)); |
119 | ufs_mark_sb_dirty(sb); | 119 | ufs_mark_sb_dirty(sb); |
120 | 120 | ||
121 | unlock_super (sb); | 121 | mutex_unlock(&UFS_SB(sb)->s_lock); |
122 | UFSD("EXIT\n"); | 122 | UFSD("EXIT\n"); |
123 | return; | 123 | return; |
124 | 124 | ||
125 | failed: | 125 | failed: |
126 | unlock_super (sb); | 126 | mutex_unlock(&UFS_SB(sb)->s_lock); |
127 | UFSD("EXIT (FAILED)\n"); | 127 | UFSD("EXIT (FAILED)\n"); |
128 | return; | 128 | return; |
129 | } | 129 | } |
@@ -155,7 +155,7 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count) | |||
155 | goto failed; | 155 | goto failed; |
156 | } | 156 | } |
157 | 157 | ||
158 | lock_super(sb); | 158 | mutex_lock(&UFS_SB(sb)->s_lock); |
159 | 159 | ||
160 | do_more: | 160 | do_more: |
161 | overflow = 0; | 161 | overflow = 0; |
@@ -215,12 +215,12 @@ do_more: | |||
215 | } | 215 | } |
216 | 216 | ||
217 | ufs_mark_sb_dirty(sb); | 217 | ufs_mark_sb_dirty(sb); |
218 | unlock_super (sb); | 218 | mutex_unlock(&UFS_SB(sb)->s_lock); |
219 | UFSD("EXIT\n"); | 219 | UFSD("EXIT\n"); |
220 | return; | 220 | return; |
221 | 221 | ||
222 | failed_unlock: | 222 | failed_unlock: |
223 | unlock_super (sb); | 223 | mutex_unlock(&UFS_SB(sb)->s_lock); |
224 | failed: | 224 | failed: |
225 | UFSD("EXIT (FAILED)\n"); | 225 | UFSD("EXIT (FAILED)\n"); |
226 | return; | 226 | return; |
@@ -361,7 +361,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, | |||
361 | usb1 = ubh_get_usb_first(uspi); | 361 | usb1 = ubh_get_usb_first(uspi); |
362 | *err = -ENOSPC; | 362 | *err = -ENOSPC; |
363 | 363 | ||
364 | lock_super (sb); | 364 | mutex_lock(&UFS_SB(sb)->s_lock); |
365 | tmp = ufs_data_ptr_to_cpu(sb, p); | 365 | tmp = ufs_data_ptr_to_cpu(sb, p); |
366 | 366 | ||
367 | if (count + ufs_fragnum(fragment) > uspi->s_fpb) { | 367 | if (count + ufs_fragnum(fragment) > uspi->s_fpb) { |
@@ -382,19 +382,19 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, | |||
382 | "fragment %llu, tmp %llu\n", | 382 | "fragment %llu, tmp %llu\n", |
383 | (unsigned long long)fragment, | 383 | (unsigned long long)fragment, |
384 | (unsigned long long)tmp); | 384 | (unsigned long long)tmp); |
385 | unlock_super(sb); | 385 | mutex_unlock(&UFS_SB(sb)->s_lock); |
386 | return INVBLOCK; | 386 | return INVBLOCK; |
387 | } | 387 | } |
388 | if (fragment < UFS_I(inode)->i_lastfrag) { | 388 | if (fragment < UFS_I(inode)->i_lastfrag) { |
389 | UFSD("EXIT (ALREADY ALLOCATED)\n"); | 389 | UFSD("EXIT (ALREADY ALLOCATED)\n"); |
390 | unlock_super (sb); | 390 | mutex_unlock(&UFS_SB(sb)->s_lock); |
391 | return 0; | 391 | return 0; |
392 | } | 392 | } |
393 | } | 393 | } |
394 | else { | 394 | else { |
395 | if (tmp) { | 395 | if (tmp) { |
396 | UFSD("EXIT (ALREADY ALLOCATED)\n"); | 396 | UFSD("EXIT (ALREADY ALLOCATED)\n"); |
397 | unlock_super(sb); | 397 | mutex_unlock(&UFS_SB(sb)->s_lock); |
398 | return 0; | 398 | return 0; |
399 | } | 399 | } |
400 | } | 400 | } |
@@ -403,7 +403,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, | |||
403 | * There is not enough space for user on the device | 403 | * There is not enough space for user on the device |
404 | */ | 404 | */ |
405 | if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) { | 405 | if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) { |
406 | unlock_super (sb); | 406 | mutex_unlock(&UFS_SB(sb)->s_lock); |
407 | UFSD("EXIT (FAILED)\n"); | 407 | UFSD("EXIT (FAILED)\n"); |
408 | return 0; | 408 | return 0; |
409 | } | 409 | } |
@@ -428,7 +428,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, | |||
428 | ufs_clear_frags(inode, result + oldcount, | 428 | ufs_clear_frags(inode, result + oldcount, |
429 | newcount - oldcount, locked_page != NULL); | 429 | newcount - oldcount, locked_page != NULL); |
430 | } | 430 | } |
431 | unlock_super(sb); | 431 | mutex_unlock(&UFS_SB(sb)->s_lock); |
432 | UFSD("EXIT, result %llu\n", (unsigned long long)result); | 432 | UFSD("EXIT, result %llu\n", (unsigned long long)result); |
433 | return result; | 433 | return result; |
434 | } | 434 | } |
@@ -443,7 +443,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, | |||
443 | fragment + count); | 443 | fragment + count); |
444 | ufs_clear_frags(inode, result + oldcount, newcount - oldcount, | 444 | ufs_clear_frags(inode, result + oldcount, newcount - oldcount, |
445 | locked_page != NULL); | 445 | locked_page != NULL); |
446 | unlock_super(sb); | 446 | mutex_unlock(&UFS_SB(sb)->s_lock); |
447 | UFSD("EXIT, result %llu\n", (unsigned long long)result); | 447 | UFSD("EXIT, result %llu\n", (unsigned long long)result); |
448 | return result; | 448 | return result; |
449 | } | 449 | } |
@@ -481,7 +481,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, | |||
481 | *err = 0; | 481 | *err = 0; |
482 | UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, | 482 | UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, |
483 | fragment + count); | 483 | fragment + count); |
484 | unlock_super(sb); | 484 | mutex_unlock(&UFS_SB(sb)->s_lock); |
485 | if (newcount < request) | 485 | if (newcount < request) |
486 | ufs_free_fragments (inode, result + newcount, request - newcount); | 486 | ufs_free_fragments (inode, result + newcount, request - newcount); |
487 | ufs_free_fragments (inode, tmp, oldcount); | 487 | ufs_free_fragments (inode, tmp, oldcount); |
@@ -489,7 +489,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, | |||
489 | return result; | 489 | return result; |
490 | } | 490 | } |
491 | 491 | ||
492 | unlock_super(sb); | 492 | mutex_unlock(&UFS_SB(sb)->s_lock); |
493 | UFSD("EXIT (FAILED)\n"); | 493 | UFSD("EXIT (FAILED)\n"); |
494 | return 0; | 494 | return 0; |
495 | } | 495 | } |
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index e84cbe21b986..d0426d74817b 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c | |||
@@ -71,11 +71,11 @@ void ufs_free_inode (struct inode * inode) | |||
71 | 71 | ||
72 | ino = inode->i_ino; | 72 | ino = inode->i_ino; |
73 | 73 | ||
74 | lock_super (sb); | 74 | mutex_lock(&UFS_SB(sb)->s_lock); |
75 | 75 | ||
76 | if (!((ino > 1) && (ino < (uspi->s_ncg * uspi->s_ipg )))) { | 76 | if (!((ino > 1) && (ino < (uspi->s_ncg * uspi->s_ipg )))) { |
77 | ufs_warning(sb, "ufs_free_inode", "reserved inode or nonexistent inode %u\n", ino); | 77 | ufs_warning(sb, "ufs_free_inode", "reserved inode or nonexistent inode %u\n", ino); |
78 | unlock_super (sb); | 78 | mutex_unlock(&UFS_SB(sb)->s_lock); |
79 | return; | 79 | return; |
80 | } | 80 | } |
81 | 81 | ||
@@ -83,7 +83,7 @@ void ufs_free_inode (struct inode * inode) | |||
83 | bit = ufs_inotocgoff (ino); | 83 | bit = ufs_inotocgoff (ino); |
84 | ucpi = ufs_load_cylinder (sb, cg); | 84 | ucpi = ufs_load_cylinder (sb, cg); |
85 | if (!ucpi) { | 85 | if (!ucpi) { |
86 | unlock_super (sb); | 86 | mutex_unlock(&UFS_SB(sb)->s_lock); |
87 | return; | 87 | return; |
88 | } | 88 | } |
89 | ucg = ubh_get_ucg(UCPI_UBH(ucpi)); | 89 | ucg = ubh_get_ucg(UCPI_UBH(ucpi)); |
@@ -117,7 +117,7 @@ void ufs_free_inode (struct inode * inode) | |||
117 | ubh_sync_block(UCPI_UBH(ucpi)); | 117 | ubh_sync_block(UCPI_UBH(ucpi)); |
118 | 118 | ||
119 | ufs_mark_sb_dirty(sb); | 119 | ufs_mark_sb_dirty(sb); |
120 | unlock_super (sb); | 120 | mutex_unlock(&UFS_SB(sb)->s_lock); |
121 | UFSD("EXIT\n"); | 121 | UFSD("EXIT\n"); |
122 | } | 122 | } |
123 | 123 | ||
@@ -197,7 +197,7 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode) | |||
197 | uspi = sbi->s_uspi; | 197 | uspi = sbi->s_uspi; |
198 | usb1 = ubh_get_usb_first(uspi); | 198 | usb1 = ubh_get_usb_first(uspi); |
199 | 199 | ||
200 | lock_super (sb); | 200 | mutex_lock(&sbi->s_lock); |
201 | 201 | ||
202 | /* | 202 | /* |
203 | * Try to place the inode in its parent directory | 203 | * Try to place the inode in its parent directory |
@@ -333,20 +333,20 @@ cg_found: | |||
333 | brelse(bh); | 333 | brelse(bh); |
334 | } | 334 | } |
335 | 335 | ||
336 | unlock_super (sb); | 336 | mutex_unlock(&sbi->s_lock); |
337 | 337 | ||
338 | UFSD("allocating inode %lu\n", inode->i_ino); | 338 | UFSD("allocating inode %lu\n", inode->i_ino); |
339 | UFSD("EXIT\n"); | 339 | UFSD("EXIT\n"); |
340 | return inode; | 340 | return inode; |
341 | 341 | ||
342 | fail_remove_inode: | 342 | fail_remove_inode: |
343 | unlock_super(sb); | 343 | mutex_unlock(&sbi->s_lock); |
344 | clear_nlink(inode); | 344 | clear_nlink(inode); |
345 | iput(inode); | 345 | iput(inode); |
346 | UFSD("EXIT (FAILED): err %d\n", err); | 346 | UFSD("EXIT (FAILED): err %d\n", err); |
347 | return ERR_PTR(err); | 347 | return ERR_PTR(err); |
348 | failed: | 348 | failed: |
349 | unlock_super (sb); | 349 | mutex_unlock(&sbi->s_lock); |
350 | make_bad_inode(inode); | 350 | make_bad_inode(inode); |
351 | iput (inode); | 351 | iput (inode); |
352 | UFSD("EXIT (FAILED): err %d\n", err); | 352 | UFSD("EXIT (FAILED): err %d\n", err); |
diff --git a/fs/ufs/super.c b/fs/ufs/super.c index f7cfecfe1cab..dc8e3a861d0f 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c | |||
@@ -699,7 +699,7 @@ static int ufs_sync_fs(struct super_block *sb, int wait) | |||
699 | unsigned flags; | 699 | unsigned flags; |
700 | 700 | ||
701 | lock_ufs(sb); | 701 | lock_ufs(sb); |
702 | lock_super(sb); | 702 | mutex_lock(&UFS_SB(sb)->s_lock); |
703 | 703 | ||
704 | UFSD("ENTER\n"); | 704 | UFSD("ENTER\n"); |
705 | 705 | ||
@@ -717,7 +717,7 @@ static int ufs_sync_fs(struct super_block *sb, int wait) | |||
717 | ufs_put_cstotal(sb); | 717 | ufs_put_cstotal(sb); |
718 | 718 | ||
719 | UFSD("EXIT\n"); | 719 | UFSD("EXIT\n"); |
720 | unlock_super(sb); | 720 | mutex_unlock(&UFS_SB(sb)->s_lock); |
721 | unlock_ufs(sb); | 721 | unlock_ufs(sb); |
722 | 722 | ||
723 | return 0; | 723 | return 0; |
@@ -805,6 +805,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) | |||
805 | } | 805 | } |
806 | #endif | 806 | #endif |
807 | mutex_init(&sbi->mutex); | 807 | mutex_init(&sbi->mutex); |
808 | mutex_init(&sbi->s_lock); | ||
808 | spin_lock_init(&sbi->work_lock); | 809 | spin_lock_init(&sbi->work_lock); |
809 | INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs); | 810 | INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs); |
810 | /* | 811 | /* |
@@ -1280,7 +1281,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) | |||
1280 | unsigned flags; | 1281 | unsigned flags; |
1281 | 1282 | ||
1282 | lock_ufs(sb); | 1283 | lock_ufs(sb); |
1283 | lock_super(sb); | 1284 | mutex_lock(&UFS_SB(sb)->s_lock); |
1284 | uspi = UFS_SB(sb)->s_uspi; | 1285 | uspi = UFS_SB(sb)->s_uspi; |
1285 | flags = UFS_SB(sb)->s_flags; | 1286 | flags = UFS_SB(sb)->s_flags; |
1286 | usb1 = ubh_get_usb_first(uspi); | 1287 | usb1 = ubh_get_usb_first(uspi); |
@@ -1294,7 +1295,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) | |||
1294 | new_mount_opt = 0; | 1295 | new_mount_opt = 0; |
1295 | ufs_set_opt (new_mount_opt, ONERROR_LOCK); | 1296 | ufs_set_opt (new_mount_opt, ONERROR_LOCK); |
1296 | if (!ufs_parse_options (data, &new_mount_opt)) { | 1297 | if (!ufs_parse_options (data, &new_mount_opt)) { |
1297 | unlock_super(sb); | 1298 | mutex_unlock(&UFS_SB(sb)->s_lock); |
1298 | unlock_ufs(sb); | 1299 | unlock_ufs(sb); |
1299 | return -EINVAL; | 1300 | return -EINVAL; |
1300 | } | 1301 | } |
@@ -1302,14 +1303,14 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) | |||
1302 | new_mount_opt |= ufstype; | 1303 | new_mount_opt |= ufstype; |
1303 | } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { | 1304 | } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { |
1304 | printk("ufstype can't be changed during remount\n"); | 1305 | printk("ufstype can't be changed during remount\n"); |
1305 | unlock_super(sb); | 1306 | mutex_unlock(&UFS_SB(sb)->s_lock); |
1306 | unlock_ufs(sb); | 1307 | unlock_ufs(sb); |
1307 | return -EINVAL; | 1308 | return -EINVAL; |
1308 | } | 1309 | } |
1309 | 1310 | ||
1310 | if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { | 1311 | if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { |
1311 | UFS_SB(sb)->s_mount_opt = new_mount_opt; | 1312 | UFS_SB(sb)->s_mount_opt = new_mount_opt; |
1312 | unlock_super(sb); | 1313 | mutex_unlock(&UFS_SB(sb)->s_lock); |
1313 | unlock_ufs(sb); | 1314 | unlock_ufs(sb); |
1314 | return 0; | 1315 | return 0; |
1315 | } | 1316 | } |
@@ -1334,7 +1335,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) | |||
1334 | #ifndef CONFIG_UFS_FS_WRITE | 1335 | #ifndef CONFIG_UFS_FS_WRITE |
1335 | printk("ufs was compiled with read-only support, " | 1336 | printk("ufs was compiled with read-only support, " |
1336 | "can't be mounted as read-write\n"); | 1337 | "can't be mounted as read-write\n"); |
1337 | unlock_super(sb); | 1338 | mutex_unlock(&UFS_SB(sb)->s_lock); |
1338 | unlock_ufs(sb); | 1339 | unlock_ufs(sb); |
1339 | return -EINVAL; | 1340 | return -EINVAL; |
1340 | #else | 1341 | #else |
@@ -1344,13 +1345,13 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) | |||
1344 | ufstype != UFS_MOUNT_UFSTYPE_SUNx86 && | 1345 | ufstype != UFS_MOUNT_UFSTYPE_SUNx86 && |
1345 | ufstype != UFS_MOUNT_UFSTYPE_UFS2) { | 1346 | ufstype != UFS_MOUNT_UFSTYPE_UFS2) { |
1346 | printk("this ufstype is read-only supported\n"); | 1347 | printk("this ufstype is read-only supported\n"); |
1347 | unlock_super(sb); | 1348 | mutex_unlock(&UFS_SB(sb)->s_lock); |
1348 | unlock_ufs(sb); | 1349 | unlock_ufs(sb); |
1349 | return -EINVAL; | 1350 | return -EINVAL; |
1350 | } | 1351 | } |
1351 | if (!ufs_read_cylinder_structures(sb)) { | 1352 | if (!ufs_read_cylinder_structures(sb)) { |
1352 | printk("failed during remounting\n"); | 1353 | printk("failed during remounting\n"); |
1353 | unlock_super(sb); | 1354 | mutex_unlock(&UFS_SB(sb)->s_lock); |
1354 | unlock_ufs(sb); | 1355 | unlock_ufs(sb); |
1355 | return -EPERM; | 1356 | return -EPERM; |
1356 | } | 1357 | } |
@@ -1358,7 +1359,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) | |||
1358 | #endif | 1359 | #endif |
1359 | } | 1360 | } |
1360 | UFS_SB(sb)->s_mount_opt = new_mount_opt; | 1361 | UFS_SB(sb)->s_mount_opt = new_mount_opt; |
1361 | unlock_super(sb); | 1362 | mutex_unlock(&UFS_SB(sb)->s_lock); |
1362 | unlock_ufs(sb); | 1363 | unlock_ufs(sb); |
1363 | return 0; | 1364 | return 0; |
1364 | } | 1365 | } |
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 343e6fc571e5..ff2c15ab81aa 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h | |||
@@ -24,6 +24,7 @@ struct ufs_sb_info { | |||
24 | int work_queued; /* non-zero if the delayed work is queued */ | 24 | int work_queued; /* non-zero if the delayed work is queued */ |
25 | struct delayed_work sync_work; /* FS sync delayed work */ | 25 | struct delayed_work sync_work; /* FS sync delayed work */ |
26 | spinlock_t work_lock; /* protects sync_work and work_queued */ | 26 | spinlock_t work_lock; /* protects sync_work and work_queued */ |
27 | struct mutex s_lock; | ||
27 | }; | 28 | }; |
28 | 29 | ||
29 | struct ufs_inode_info { | 30 | struct ufs_inode_info { |
diff --git a/fs/xattr.c b/fs/xattr.c index 1780f062dbaf..e164dddb8e96 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -412,7 +412,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, | |||
412 | if (!f.file) | 412 | if (!f.file) |
413 | return error; | 413 | return error; |
414 | dentry = f.file->f_path.dentry; | 414 | dentry = f.file->f_path.dentry; |
415 | audit_inode(NULL, dentry); | 415 | audit_inode(NULL, dentry, 0); |
416 | error = mnt_want_write_file(f.file); | 416 | error = mnt_want_write_file(f.file); |
417 | if (!error) { | 417 | if (!error) { |
418 | error = setxattr(dentry, name, value, size, flags); | 418 | error = setxattr(dentry, name, value, size, flags); |
@@ -507,7 +507,7 @@ SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, | |||
507 | 507 | ||
508 | if (!f.file) | 508 | if (!f.file) |
509 | return error; | 509 | return error; |
510 | audit_inode(NULL, f.file->f_path.dentry); | 510 | audit_inode(NULL, f.file->f_path.dentry, 0); |
511 | error = getxattr(f.file->f_path.dentry, name, value, size); | 511 | error = getxattr(f.file->f_path.dentry, name, value, size); |
512 | fdput(f); | 512 | fdput(f); |
513 | return error; | 513 | return error; |
@@ -586,7 +586,7 @@ SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) | |||
586 | 586 | ||
587 | if (!f.file) | 587 | if (!f.file) |
588 | return error; | 588 | return error; |
589 | audit_inode(NULL, f.file->f_path.dentry); | 589 | audit_inode(NULL, f.file->f_path.dentry, 0); |
590 | error = listxattr(f.file->f_path.dentry, list, size); | 590 | error = listxattr(f.file->f_path.dentry, list, size); |
591 | fdput(f); | 591 | fdput(f); |
592 | return error; | 592 | return error; |
@@ -655,7 +655,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) | |||
655 | if (!f.file) | 655 | if (!f.file) |
656 | return error; | 656 | return error; |
657 | dentry = f.file->f_path.dentry; | 657 | dentry = f.file->f_path.dentry; |
658 | audit_inode(NULL, dentry); | 658 | audit_inode(NULL, dentry, 0); |
659 | error = mnt_want_write_file(f.file); | 659 | error = mnt_want_write_file(f.file); |
660 | if (!error) { | 660 | if (!error) { |
661 | error = removexattr(dentry, name); | 661 | error = removexattr(dentry, name); |
diff --git a/fs/xattr_acl.c b/fs/xattr_acl.c index 11efd830b5f5..9fbea87fdb6e 100644 --- a/fs/xattr_acl.c +++ b/fs/xattr_acl.c | |||
@@ -45,7 +45,7 @@ static void posix_acl_fix_xattr_userns( | |||
45 | break; | 45 | break; |
46 | case ACL_GROUP: | 46 | case ACL_GROUP: |
47 | gid = make_kgid(from, le32_to_cpu(entry->e_id)); | 47 | gid = make_kgid(from, le32_to_cpu(entry->e_id)); |
48 | entry->e_id = cpu_to_le32(from_kuid(to, uid)); | 48 | entry->e_id = cpu_to_le32(from_kgid(to, gid)); |
49 | break; | 49 | break; |
50 | default: | 50 | default: |
51 | break; | 51 | break; |
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 42679223a0fd..8c6d1d70278c 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c | |||
@@ -189,6 +189,9 @@ xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid, | |||
189 | struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; | 189 | struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; |
190 | struct inode *inode = NULL; | 190 | struct inode *inode = NULL; |
191 | 191 | ||
192 | if (fh_len < xfs_fileid_length(fileid_type)) | ||
193 | return NULL; | ||
194 | |||
192 | switch (fileid_type) { | 195 | switch (fileid_type) { |
193 | case FILEID_INO32_GEN_PARENT: | 196 | case FILEID_INO32_GEN_PARENT: |
194 | inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino, | 197 | inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino, |