aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c7
-rw-r--r--fs/9p/vfs_inode_dotl.c8
-rw-r--r--fs/afs/dir.c3
-rw-r--r--fs/aio.c52
-rw-r--r--fs/autofs4/waitq.c13
-rw-r--r--fs/binfmt_elf.c30
-rw-r--r--fs/bio-integrity.c2
-rw-r--r--fs/bio.c4
-rw-r--r--fs/btrfs/async-thread.c25
-rw-r--r--fs/btrfs/async-thread.h2
-rw-r--r--fs/btrfs/btrfs_inode.h5
-rw-r--r--fs/btrfs/ctree.c7
-rw-r--r--fs/btrfs/ctree.h17
-rw-r--r--fs/btrfs/dev-replace.c9
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent-tree.c57
-rw-r--r--fs/btrfs/extent_io.c18
-rw-r--r--fs/btrfs/file.c4
-rw-r--r--fs/btrfs/free-space-cache.c67
-rw-r--r--fs/btrfs/free-space-cache.h5
-rw-r--r--fs/btrfs/inode.c16
-rw-r--r--fs/btrfs/ioctl.c80
-rw-r--r--fs/btrfs/ordered-data.c24
-rw-r--r--fs/btrfs/ordered-data.h5
-rw-r--r--fs/btrfs/relocation.c43
-rw-r--r--fs/btrfs/scrub.c112
-rw-r--r--fs/btrfs/super.c21
-rw-r--r--fs/btrfs/transaction.c9
-rw-r--r--fs/btrfs/tree-log.c52
-rw-r--r--fs/btrfs/volumes.c14
-rw-r--r--fs/cachefiles/namei.c2
-rw-r--r--fs/cachefiles/xattr.c29
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h5
-rw-r--r--fs/cifs/cifspdu.h21
-rw-r--r--fs/cifs/cifssmb.c1
-rw-r--r--fs/cifs/dir.c1
-rw-r--r--fs/cifs/file.c8
-rw-r--r--fs/cifs/fscache.c7
-rw-r--r--fs/cifs/fscache.h13
-rw-r--r--fs/cifs/inode.c45
-rw-r--r--fs/cifs/readdir.c3
-rw-r--r--fs/cifs/sess.c84
-rw-r--r--fs/fscache/cookie.c3
-rw-r--r--fs/fuse/dir.c20
-rw-r--r--fs/fuse/file.c23
-rw-r--r--fs/fuse/fuse_i.h2
-rw-r--r--fs/gfs2/inode.c4
-rw-r--r--fs/namei.c34
-rw-r--r--fs/nfs/dir.c5
-rw-r--r--fs/nfs/nfs4file.c3
-rw-r--r--fs/nfs/nfs4filelayoutdev.c20
-rw-r--r--fs/nfs/nfs4proc.c58
-rw-r--r--fs/nilfs2/page.c2
-rw-r--r--fs/nilfs2/segment.c11
-rw-r--r--fs/ocfs2/dcache.c7
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/open.c21
-rw-r--r--fs/pstore/platform.c29
-rw-r--r--fs/reiserfs/journal.c67
-rw-r--r--fs/super.c4
-rw-r--r--fs/sysv/super.c1
-rw-r--r--fs/udf/ialloc.c16
-rw-r--r--fs/udf/super.c64
-rw-r--r--fs/udf/udf_sb.h2
-rw-r--r--fs/xfs/xfs_buf_item.c1
-rw-r--r--fs/xfs/xfs_da_btree.c5
-rw-r--r--fs/xfs/xfs_dir2_block.c6
-rw-r--r--fs/xfs/xfs_dir2_format.h51
-rw-r--r--fs/xfs/xfs_dir2_readdir.c4
-rw-r--r--fs/xfs/xfs_dir2_sf.c6
-rw-r--r--fs/xfs/xfs_dquot.c19
-rw-r--r--fs/xfs/xfs_fs.h2
-rw-r--r--fs/xfs/xfs_icache.c9
-rw-r--r--fs/xfs/xfs_log_recover.c74
75 files changed, 907 insertions, 602 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 58e6cbce4156..08f2e1e9a7e6 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -603,10 +603,11 @@ static int v9fs_cache_register(void)
603 if (ret < 0) 603 if (ret < 0)
604 return ret; 604 return ret;
605#ifdef CONFIG_9P_FSCACHE 605#ifdef CONFIG_9P_FSCACHE
606 return fscache_register_netfs(&v9fs_cache_netfs); 606 ret = fscache_register_netfs(&v9fs_cache_netfs);
607#else 607 if (ret < 0)
608 return ret; 608 v9fs_destroy_inode_cache();
609#endif 609#endif
610 return ret;
610} 611}
611 612
612static void v9fs_cache_unregister(void) 613static void v9fs_cache_unregister(void)
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 53687bbf2296..a7c481402c46 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -267,14 +267,8 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
267 } 267 }
268 268
269 /* Only creates */ 269 /* Only creates */
270 if (!(flags & O_CREAT)) 270 if (!(flags & O_CREAT) || dentry->d_inode)
271 return finish_no_open(file, res); 271 return finish_no_open(file, res);
272 else if (dentry->d_inode) {
273 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
274 return -EEXIST;
275 else
276 return finish_no_open(file, res);
277 }
278 272
279 v9ses = v9fs_inode2v9ses(dir); 273 v9ses = v9fs_inode2v9ses(dir);
280 274
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 646337dc5201..529300327f45 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -600,9 +600,6 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
600 600
601 /* lock down the parent dentry so we can peer at it */ 601 /* lock down the parent dentry so we can peer at it */
602 parent = dget_parent(dentry); 602 parent = dget_parent(dentry);
603 if (!parent->d_inode)
604 goto out_bad;
605
606 dir = AFS_FS_I(parent->d_inode); 603 dir = AFS_FS_I(parent->d_inode);
607 604
608 /* validate the parent directory */ 605 /* validate the parent directory */
diff --git a/fs/aio.c b/fs/aio.c
index 6b868f0e0c4c..067e3d340c35 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -167,10 +167,25 @@ static int __init aio_setup(void)
167} 167}
168__initcall(aio_setup); 168__initcall(aio_setup);
169 169
170static void put_aio_ring_file(struct kioctx *ctx)
171{
172 struct file *aio_ring_file = ctx->aio_ring_file;
173 if (aio_ring_file) {
174 truncate_setsize(aio_ring_file->f_inode, 0);
175
176 /* Prevent further access to the kioctx from migratepages */
177 spin_lock(&aio_ring_file->f_inode->i_mapping->private_lock);
178 aio_ring_file->f_inode->i_mapping->private_data = NULL;
179 ctx->aio_ring_file = NULL;
180 spin_unlock(&aio_ring_file->f_inode->i_mapping->private_lock);
181
182 fput(aio_ring_file);
183 }
184}
185
170static void aio_free_ring(struct kioctx *ctx) 186static void aio_free_ring(struct kioctx *ctx)
171{ 187{
172 int i; 188 int i;
173 struct file *aio_ring_file = ctx->aio_ring_file;
174 189
175 for (i = 0; i < ctx->nr_pages; i++) { 190 for (i = 0; i < ctx->nr_pages; i++) {
176 pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i, 191 pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
@@ -178,14 +193,10 @@ static void aio_free_ring(struct kioctx *ctx)
178 put_page(ctx->ring_pages[i]); 193 put_page(ctx->ring_pages[i]);
179 } 194 }
180 195
196 put_aio_ring_file(ctx);
197
181 if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) 198 if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages)
182 kfree(ctx->ring_pages); 199 kfree(ctx->ring_pages);
183
184 if (aio_ring_file) {
185 truncate_setsize(aio_ring_file->f_inode, 0);
186 fput(aio_ring_file);
187 ctx->aio_ring_file = NULL;
188 }
189} 200}
190 201
191static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) 202static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
@@ -207,9 +218,8 @@ static int aio_set_page_dirty(struct page *page)
207static int aio_migratepage(struct address_space *mapping, struct page *new, 218static int aio_migratepage(struct address_space *mapping, struct page *new,
208 struct page *old, enum migrate_mode mode) 219 struct page *old, enum migrate_mode mode)
209{ 220{
210 struct kioctx *ctx = mapping->private_data; 221 struct kioctx *ctx;
211 unsigned long flags; 222 unsigned long flags;
212 unsigned idx = old->index;
213 int rc; 223 int rc;
214 224
215 /* Writeback must be complete */ 225 /* Writeback must be complete */
@@ -224,10 +234,23 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
224 234
225 get_page(new); 235 get_page(new);
226 236
227 spin_lock_irqsave(&ctx->completion_lock, flags); 237 /* We can potentially race against kioctx teardown here. Use the
228 migrate_page_copy(new, old); 238 * address_space's private data lock to protect the mapping's
229 ctx->ring_pages[idx] = new; 239 * private_data.
230 spin_unlock_irqrestore(&ctx->completion_lock, flags); 240 */
241 spin_lock(&mapping->private_lock);
242 ctx = mapping->private_data;
243 if (ctx) {
244 pgoff_t idx;
245 spin_lock_irqsave(&ctx->completion_lock, flags);
246 migrate_page_copy(new, old);
247 idx = old->index;
248 if (idx < (pgoff_t)ctx->nr_pages)
249 ctx->ring_pages[idx] = new;
250 spin_unlock_irqrestore(&ctx->completion_lock, flags);
251 } else
252 rc = -EBUSY;
253 spin_unlock(&mapping->private_lock);
231 254
232 return rc; 255 return rc;
233} 256}
@@ -617,8 +640,7 @@ out_freepcpu:
617out_freeref: 640out_freeref:
618 free_percpu(ctx->users.pcpu_count); 641 free_percpu(ctx->users.pcpu_count);
619out_freectx: 642out_freectx:
620 if (ctx->aio_ring_file) 643 put_aio_ring_file(ctx);
621 fput(ctx->aio_ring_file);
622 kmem_cache_free(kioctx_cachep, ctx); 644 kmem_cache_free(kioctx_cachep, ctx);
623 pr_debug("error allocating ioctx %d\n", err); 645 pr_debug("error allocating ioctx %d\n", err);
624 return ERR_PTR(err); 646 return ERR_PTR(err);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 3db70dae40d3..689e40d983ad 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -109,13 +109,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
109 109
110 pkt.hdr.proto_version = sbi->version; 110 pkt.hdr.proto_version = sbi->version;
111 pkt.hdr.type = type; 111 pkt.hdr.type = type;
112 mutex_lock(&sbi->wq_mutex);
113 112
114 /* Check if we have become catatonic */
115 if (sbi->catatonic) {
116 mutex_unlock(&sbi->wq_mutex);
117 return;
118 }
119 switch (type) { 113 switch (type) {
120 /* Kernel protocol v4 missing and expire packets */ 114 /* Kernel protocol v4 missing and expire packets */
121 case autofs_ptype_missing: 115 case autofs_ptype_missing:
@@ -427,7 +421,6 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
427 wq->tgid = current->tgid; 421 wq->tgid = current->tgid;
428 wq->status = -EINTR; /* Status return if interrupted */ 422 wq->status = -EINTR; /* Status return if interrupted */
429 wq->wait_ctr = 2; 423 wq->wait_ctr = 2;
430 mutex_unlock(&sbi->wq_mutex);
431 424
432 if (sbi->version < 5) { 425 if (sbi->version < 5) {
433 if (notify == NFY_MOUNT) 426 if (notify == NFY_MOUNT)
@@ -449,15 +442,15 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
449 (unsigned long) wq->wait_queue_token, wq->name.len, 442 (unsigned long) wq->wait_queue_token, wq->name.len,
450 wq->name.name, notify); 443 wq->name.name, notify);
451 444
452 /* autofs4_notify_daemon() may block */ 445 /* autofs4_notify_daemon() may block; it will unlock ->wq_mutex */
453 autofs4_notify_daemon(sbi, wq, type); 446 autofs4_notify_daemon(sbi, wq, type);
454 } else { 447 } else {
455 wq->wait_ctr++; 448 wq->wait_ctr++;
456 mutex_unlock(&sbi->wq_mutex);
457 kfree(qstr.name);
458 DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d", 449 DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d",
459 (unsigned long) wq->wait_queue_token, wq->name.len, 450 (unsigned long) wq->wait_queue_token, wq->name.len,
460 wq->name.name, notify); 451 wq->name.name, notify);
452 mutex_unlock(&sbi->wq_mutex);
453 kfree(qstr.name);
461 } 454 }
462 455
463 /* 456 /*
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 100edcc5e312..4c94a79991bb 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1413,7 +1413,7 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1413 * long file_ofs 1413 * long file_ofs
1414 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... 1414 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1415 */ 1415 */
1416static void fill_files_note(struct memelfnote *note) 1416static int fill_files_note(struct memelfnote *note)
1417{ 1417{
1418 struct vm_area_struct *vma; 1418 struct vm_area_struct *vma;
1419 unsigned count, size, names_ofs, remaining, n; 1419 unsigned count, size, names_ofs, remaining, n;
@@ -1428,11 +1428,11 @@ static void fill_files_note(struct memelfnote *note)
1428 names_ofs = (2 + 3 * count) * sizeof(data[0]); 1428 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1429 alloc: 1429 alloc:
1430 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */ 1430 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1431 goto err; 1431 return -EINVAL;
1432 size = round_up(size, PAGE_SIZE); 1432 size = round_up(size, PAGE_SIZE);
1433 data = vmalloc(size); 1433 data = vmalloc(size);
1434 if (!data) 1434 if (!data)
1435 goto err; 1435 return -ENOMEM;
1436 1436
1437 start_end_ofs = data + 2; 1437 start_end_ofs = data + 2;
1438 name_base = name_curpos = ((char *)data) + names_ofs; 1438 name_base = name_curpos = ((char *)data) + names_ofs;
@@ -1485,7 +1485,7 @@ static void fill_files_note(struct memelfnote *note)
1485 1485
1486 size = name_curpos - (char *)data; 1486 size = name_curpos - (char *)data;
1487 fill_note(note, "CORE", NT_FILE, size, data); 1487 fill_note(note, "CORE", NT_FILE, size, data);
1488 err: ; 1488 return 0;
1489} 1489}
1490 1490
1491#ifdef CORE_DUMP_USE_REGSET 1491#ifdef CORE_DUMP_USE_REGSET
@@ -1686,8 +1686,8 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1686 fill_auxv_note(&info->auxv, current->mm); 1686 fill_auxv_note(&info->auxv, current->mm);
1687 info->size += notesize(&info->auxv); 1687 info->size += notesize(&info->auxv);
1688 1688
1689 fill_files_note(&info->files); 1689 if (fill_files_note(&info->files) == 0)
1690 info->size += notesize(&info->files); 1690 info->size += notesize(&info->files);
1691 1691
1692 return 1; 1692 return 1;
1693} 1693}
@@ -1719,7 +1719,8 @@ static int write_note_info(struct elf_note_info *info,
1719 return 0; 1719 return 0;
1720 if (first && !writenote(&info->auxv, file, foffset)) 1720 if (first && !writenote(&info->auxv, file, foffset))
1721 return 0; 1721 return 0;
1722 if (first && !writenote(&info->files, file, foffset)) 1722 if (first && info->files.data &&
1723 !writenote(&info->files, file, foffset))
1723 return 0; 1724 return 0;
1724 1725
1725 for (i = 1; i < info->thread_notes; ++i) 1726 for (i = 1; i < info->thread_notes; ++i)
@@ -1806,6 +1807,7 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1806 1807
1807struct elf_note_info { 1808struct elf_note_info {
1808 struct memelfnote *notes; 1809 struct memelfnote *notes;
1810 struct memelfnote *notes_files;
1809 struct elf_prstatus *prstatus; /* NT_PRSTATUS */ 1811 struct elf_prstatus *prstatus; /* NT_PRSTATUS */
1810 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */ 1812 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
1811 struct list_head thread_list; 1813 struct list_head thread_list;
@@ -1896,9 +1898,12 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1896 1898
1897 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo); 1899 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1898 fill_auxv_note(info->notes + 3, current->mm); 1900 fill_auxv_note(info->notes + 3, current->mm);
1899 fill_files_note(info->notes + 4); 1901 info->numnote = 4;
1900 1902
1901 info->numnote = 5; 1903 if (fill_files_note(info->notes + info->numnote) == 0) {
1904 info->notes_files = info->notes + info->numnote;
1905 info->numnote++;
1906 }
1902 1907
1903 /* Try to dump the FPU. */ 1908 /* Try to dump the FPU. */
1904 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, 1909 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
@@ -1960,8 +1965,9 @@ static void free_note_info(struct elf_note_info *info)
1960 kfree(list_entry(tmp, struct elf_thread_status, list)); 1965 kfree(list_entry(tmp, struct elf_thread_status, list));
1961 } 1966 }
1962 1967
1963 /* Free data allocated by fill_files_note(): */ 1968 /* Free data possibly allocated by fill_files_note(): */
1964 vfree(info->notes[4].data); 1969 if (info->notes_files)
1970 vfree(info->notes_files->data);
1965 1971
1966 kfree(info->prstatus); 1972 kfree(info->prstatus);
1967 kfree(info->psinfo); 1973 kfree(info->psinfo);
@@ -2044,7 +2050,7 @@ static int elf_core_dump(struct coredump_params *cprm)
2044 struct vm_area_struct *vma, *gate_vma; 2050 struct vm_area_struct *vma, *gate_vma;
2045 struct elfhdr *elf = NULL; 2051 struct elfhdr *elf = NULL;
2046 loff_t offset = 0, dataoff, foffset; 2052 loff_t offset = 0, dataoff, foffset;
2047 struct elf_note_info info; 2053 struct elf_note_info info = { };
2048 struct elf_phdr *phdr4note = NULL; 2054 struct elf_phdr *phdr4note = NULL;
2049 struct elf_shdr *shdr4extnum = NULL; 2055 struct elf_shdr *shdr4extnum = NULL;
2050 Elf_Half e_phnum; 2056 Elf_Half e_phnum;
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 60250847929f..fc60b31453ee 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -735,7 +735,7 @@ void bioset_integrity_free(struct bio_set *bs)
735 mempool_destroy(bs->bio_integrity_pool); 735 mempool_destroy(bs->bio_integrity_pool);
736 736
737 if (bs->bvec_integrity_pool) 737 if (bs->bvec_integrity_pool)
738 mempool_destroy(bs->bio_integrity_pool); 738 mempool_destroy(bs->bvec_integrity_pool);
739} 739}
740EXPORT_SYMBOL(bioset_integrity_free); 740EXPORT_SYMBOL(bioset_integrity_free);
741 741
diff --git a/fs/bio.c b/fs/bio.c
index b3b20ed9510e..ea5035da4d9a 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -917,8 +917,8 @@ void bio_copy_data(struct bio *dst, struct bio *src)
917 src_p = kmap_atomic(src_bv->bv_page); 917 src_p = kmap_atomic(src_bv->bv_page);
918 dst_p = kmap_atomic(dst_bv->bv_page); 918 dst_p = kmap_atomic(dst_bv->bv_page);
919 919
920 memcpy(dst_p + dst_bv->bv_offset, 920 memcpy(dst_p + dst_offset,
921 src_p + src_bv->bv_offset, 921 src_p + src_offset,
922 bytes); 922 bytes);
923 923
924 kunmap_atomic(dst_p); 924 kunmap_atomic(dst_p);
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 58b7d14b08ee..08cc08f037a6 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -107,7 +107,8 @@ static void check_idle_worker(struct btrfs_worker_thread *worker)
107 worker->idle = 1; 107 worker->idle = 1;
108 108
109 /* the list may be empty if the worker is just starting */ 109 /* the list may be empty if the worker is just starting */
110 if (!list_empty(&worker->worker_list)) { 110 if (!list_empty(&worker->worker_list) &&
111 !worker->workers->stopping) {
111 list_move(&worker->worker_list, 112 list_move(&worker->worker_list,
112 &worker->workers->idle_list); 113 &worker->workers->idle_list);
113 } 114 }
@@ -127,7 +128,8 @@ static void check_busy_worker(struct btrfs_worker_thread *worker)
127 spin_lock_irqsave(&worker->workers->lock, flags); 128 spin_lock_irqsave(&worker->workers->lock, flags);
128 worker->idle = 0; 129 worker->idle = 0;
129 130
130 if (!list_empty(&worker->worker_list)) { 131 if (!list_empty(&worker->worker_list) &&
132 !worker->workers->stopping) {
131 list_move_tail(&worker->worker_list, 133 list_move_tail(&worker->worker_list,
132 &worker->workers->worker_list); 134 &worker->workers->worker_list);
133 } 135 }
@@ -412,6 +414,7 @@ void btrfs_stop_workers(struct btrfs_workers *workers)
412 int can_stop; 414 int can_stop;
413 415
414 spin_lock_irq(&workers->lock); 416 spin_lock_irq(&workers->lock);
417 workers->stopping = 1;
415 list_splice_init(&workers->idle_list, &workers->worker_list); 418 list_splice_init(&workers->idle_list, &workers->worker_list);
416 while (!list_empty(&workers->worker_list)) { 419 while (!list_empty(&workers->worker_list)) {
417 cur = workers->worker_list.next; 420 cur = workers->worker_list.next;
@@ -455,6 +458,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
455 workers->ordered = 0; 458 workers->ordered = 0;
456 workers->atomic_start_pending = 0; 459 workers->atomic_start_pending = 0;
457 workers->atomic_worker_start = async_helper; 460 workers->atomic_worker_start = async_helper;
461 workers->stopping = 0;
458} 462}
459 463
460/* 464/*
@@ -480,15 +484,19 @@ static int __btrfs_start_workers(struct btrfs_workers *workers)
480 atomic_set(&worker->num_pending, 0); 484 atomic_set(&worker->num_pending, 0);
481 atomic_set(&worker->refs, 1); 485 atomic_set(&worker->refs, 1);
482 worker->workers = workers; 486 worker->workers = workers;
483 worker->task = kthread_run(worker_loop, worker, 487 worker->task = kthread_create(worker_loop, worker,
484 "btrfs-%s-%d", workers->name, 488 "btrfs-%s-%d", workers->name,
485 workers->num_workers + 1); 489 workers->num_workers + 1);
486 if (IS_ERR(worker->task)) { 490 if (IS_ERR(worker->task)) {
487 ret = PTR_ERR(worker->task); 491 ret = PTR_ERR(worker->task);
488 kfree(worker);
489 goto fail; 492 goto fail;
490 } 493 }
494
491 spin_lock_irq(&workers->lock); 495 spin_lock_irq(&workers->lock);
496 if (workers->stopping) {
497 spin_unlock_irq(&workers->lock);
498 goto fail_kthread;
499 }
492 list_add_tail(&worker->worker_list, &workers->idle_list); 500 list_add_tail(&worker->worker_list, &workers->idle_list);
493 worker->idle = 1; 501 worker->idle = 1;
494 workers->num_workers++; 502 workers->num_workers++;
@@ -496,8 +504,13 @@ static int __btrfs_start_workers(struct btrfs_workers *workers)
496 WARN_ON(workers->num_workers_starting < 0); 504 WARN_ON(workers->num_workers_starting < 0);
497 spin_unlock_irq(&workers->lock); 505 spin_unlock_irq(&workers->lock);
498 506
507 wake_up_process(worker->task);
499 return 0; 508 return 0;
509
510fail_kthread:
511 kthread_stop(worker->task);
500fail: 512fail:
513 kfree(worker);
501 spin_lock_irq(&workers->lock); 514 spin_lock_irq(&workers->lock);
502 workers->num_workers_starting--; 515 workers->num_workers_starting--;
503 spin_unlock_irq(&workers->lock); 516 spin_unlock_irq(&workers->lock);
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 063698b90ce2..1f26792683ed 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -107,6 +107,8 @@ struct btrfs_workers {
107 107
108 /* extra name for this worker, used for current->name */ 108 /* extra name for this worker, used for current->name */
109 char *name; 109 char *name;
110
111 int stopping;
110}; 112};
111 113
112void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); 114void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index d0ae226926ee..71f074e1870b 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -213,7 +213,10 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode)
213static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) 213static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
214{ 214{
215 if (BTRFS_I(inode)->logged_trans == generation && 215 if (BTRFS_I(inode)->logged_trans == generation &&
216 BTRFS_I(inode)->last_sub_trans <= BTRFS_I(inode)->last_log_commit) 216 BTRFS_I(inode)->last_sub_trans <=
217 BTRFS_I(inode)->last_log_commit &&
218 BTRFS_I(inode)->last_sub_trans <=
219 BTRFS_I(inode)->root->last_log_commit)
217 return 1; 220 return 1;
218 return 0; 221 return 0;
219} 222}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 64346721173f..61b5bcd57b7e 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1005,8 +1005,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1005 return ret; 1005 return ret;
1006 } 1006 }
1007 1007
1008 if (root->ref_cows) 1008 if (root->ref_cows) {
1009 btrfs_reloc_cow_block(trans, root, buf, cow); 1009 ret = btrfs_reloc_cow_block(trans, root, buf, cow);
1010 if (ret)
1011 return ret;
1012 }
1010 1013
1011 if (buf == root->node) { 1014 if (buf == root->node) {
1012 WARN_ON(parent && parent != buf); 1015 WARN_ON(parent && parent != buf);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3c1da6f98a4d..0506f40ede83 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1118,15 +1118,6 @@ struct btrfs_space_info {
1118 */ 1118 */
1119 struct percpu_counter total_bytes_pinned; 1119 struct percpu_counter total_bytes_pinned;
1120 1120
1121 /*
1122 * we bump reservation progress every time we decrement
1123 * bytes_reserved. This way people waiting for reservations
1124 * know something good has happened and they can check
1125 * for progress. The number here isn't to be trusted, it
1126 * just shows reclaim activity
1127 */
1128 unsigned long reservation_progress;
1129
1130 unsigned int full:1; /* indicates that we cannot allocate any more 1121 unsigned int full:1; /* indicates that we cannot allocate any more
1131 chunks for this space */ 1122 chunks for this space */
1132 unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ 1123 unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
@@ -3135,7 +3126,7 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
3135 unsigned num_items) 3126 unsigned num_items)
3136{ 3127{
3137 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * 3128 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
3138 3 * num_items; 3129 2 * num_items;
3139} 3130}
3140 3131
3141/* 3132/*
@@ -3939,9 +3930,9 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
3939 struct btrfs_root *root); 3930 struct btrfs_root *root);
3940int btrfs_recover_relocation(struct btrfs_root *root); 3931int btrfs_recover_relocation(struct btrfs_root *root);
3941int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); 3932int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
3942void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, 3933int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
3943 struct btrfs_root *root, struct extent_buffer *buf, 3934 struct btrfs_root *root, struct extent_buffer *buf,
3944 struct extent_buffer *cow); 3935 struct extent_buffer *cow);
3945void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, 3936void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
3946 struct btrfs_pending_snapshot *pending, 3937 struct btrfs_pending_snapshot *pending,
3947 u64 *bytes_to_reserve); 3938 u64 *bytes_to_reserve);
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index a64435359385..9efb94e95858 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -400,7 +400,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
400 args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; 400 args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
401 btrfs_dev_replace_unlock(dev_replace); 401 btrfs_dev_replace_unlock(dev_replace);
402 402
403 btrfs_wait_all_ordered_extents(root->fs_info, 0); 403 btrfs_wait_all_ordered_extents(root->fs_info);
404 404
405 /* force writing the updated state information to disk */ 405 /* force writing the updated state information to disk */
406 trans = btrfs_start_transaction(root, 0); 406 trans = btrfs_start_transaction(root, 0);
@@ -475,7 +475,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
475 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); 475 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
476 return ret; 476 return ret;
477 } 477 }
478 btrfs_wait_all_ordered_extents(root->fs_info, 0); 478 btrfs_wait_all_ordered_extents(root->fs_info);
479 479
480 trans = btrfs_start_transaction(root, 0); 480 trans = btrfs_start_transaction(root, 0);
481 if (IS_ERR(trans)) { 481 if (IS_ERR(trans)) {
@@ -535,10 +535,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
535 list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); 535 list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
536 536
537 btrfs_rm_dev_replace_srcdev(fs_info, src_device); 537 btrfs_rm_dev_replace_srcdev(fs_info, src_device);
538 if (src_device->bdev) { 538
539 /* zero out the old super */
540 btrfs_scratch_superblock(src_device);
541 }
542 /* 539 /*
543 * this is again a consistent state where no dev_replace procedure 540 * this is again a consistent state where no dev_replace procedure
544 * is running, the target device is part of the filesystem, the 541 * is running, the target device is part of the filesystem, the
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4cbb00af92ff..4ae17ed13b32 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -157,6 +157,7 @@ static struct btrfs_lockdep_keyset {
157 { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" }, 157 { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" },
158 { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, 158 { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
159 { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, 159 { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
160 { .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" },
160 { .id = 0, .name_stem = "tree" }, 161 { .id = 0, .name_stem = "tree" },
161}; 162};
162 163
@@ -3415,6 +3416,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
3415 if (total_errors > max_errors) { 3416 if (total_errors > max_errors) {
3416 printk(KERN_ERR "btrfs: %d errors while writing supers\n", 3417 printk(KERN_ERR "btrfs: %d errors while writing supers\n",
3417 total_errors); 3418 total_errors);
3419 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
3418 3420
3419 /* FUA is masked off if unsupported and can't be the reason */ 3421 /* FUA is masked off if unsupported and can't be the reason */
3420 btrfs_error(root->fs_info, -EIO, 3422 btrfs_error(root->fs_info, -EIO,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index cfb3cf711b34..d58bef130a41 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3925,7 +3925,6 @@ static int can_overcommit(struct btrfs_root *root,
3925 u64 space_size; 3925 u64 space_size;
3926 u64 avail; 3926 u64 avail;
3927 u64 used; 3927 u64 used;
3928 u64 to_add;
3929 3928
3930 used = space_info->bytes_used + space_info->bytes_reserved + 3929 used = space_info->bytes_used + space_info->bytes_reserved +
3931 space_info->bytes_pinned + space_info->bytes_readonly; 3930 space_info->bytes_pinned + space_info->bytes_readonly;
@@ -3959,25 +3958,17 @@ static int can_overcommit(struct btrfs_root *root,
3959 BTRFS_BLOCK_GROUP_RAID10)) 3958 BTRFS_BLOCK_GROUP_RAID10))
3960 avail >>= 1; 3959 avail >>= 1;
3961 3960
3962 to_add = space_info->total_bytes;
3963
3964 /* 3961 /*
3965 * If we aren't flushing all things, let us overcommit up to 3962 * If we aren't flushing all things, let us overcommit up to
3966 * 1/2th of the space. If we can flush, don't let us overcommit 3963 * 1/2th of the space. If we can flush, don't let us overcommit
3967 * too much, let it overcommit up to 1/8 of the space. 3964 * too much, let it overcommit up to 1/8 of the space.
3968 */ 3965 */
3969 if (flush == BTRFS_RESERVE_FLUSH_ALL) 3966 if (flush == BTRFS_RESERVE_FLUSH_ALL)
3970 to_add >>= 3; 3967 avail >>= 3;
3971 else 3968 else
3972 to_add >>= 1; 3969 avail >>= 1;
3973
3974 /*
3975 * Limit the overcommit to the amount of free space we could possibly
3976 * allocate for chunks.
3977 */
3978 to_add = min(avail, to_add);
3979 3970
3980 if (used + bytes < space_info->total_bytes + to_add) 3971 if (used + bytes < space_info->total_bytes + avail)
3981 return 1; 3972 return 1;
3982 return 0; 3973 return 0;
3983} 3974}
@@ -4000,7 +3991,7 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
4000 */ 3991 */
4001 btrfs_start_all_delalloc_inodes(root->fs_info, 0); 3992 btrfs_start_all_delalloc_inodes(root->fs_info, 0);
4002 if (!current->journal_info) 3993 if (!current->journal_info)
4003 btrfs_wait_all_ordered_extents(root->fs_info, 0); 3994 btrfs_wait_all_ordered_extents(root->fs_info);
4004 } 3995 }
4005} 3996}
4006 3997
@@ -4030,7 +4021,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
4030 if (delalloc_bytes == 0) { 4021 if (delalloc_bytes == 0) {
4031 if (trans) 4022 if (trans)
4032 return; 4023 return;
4033 btrfs_wait_all_ordered_extents(root->fs_info, 0); 4024 btrfs_wait_all_ordered_extents(root->fs_info);
4034 return; 4025 return;
4035 } 4026 }
4036 4027
@@ -4058,7 +4049,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
4058 4049
4059 loops++; 4050 loops++;
4060 if (wait_ordered && !trans) { 4051 if (wait_ordered && !trans) {
4061 btrfs_wait_all_ordered_extents(root->fs_info, 0); 4052 btrfs_wait_all_ordered_extents(root->fs_info);
4062 } else { 4053 } else {
4063 time_left = schedule_timeout_killable(1); 4054 time_left = schedule_timeout_killable(1);
4064 if (time_left) 4055 if (time_left)
@@ -4465,7 +4456,6 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
4465 space_info->bytes_may_use -= num_bytes; 4456 space_info->bytes_may_use -= num_bytes;
4466 trace_btrfs_space_reservation(fs_info, "space_info", 4457 trace_btrfs_space_reservation(fs_info, "space_info",
4467 space_info->flags, num_bytes, 0); 4458 space_info->flags, num_bytes, 0);
4468 space_info->reservation_progress++;
4469 spin_unlock(&space_info->lock); 4459 spin_unlock(&space_info->lock);
4470 } 4460 }
4471 } 4461 }
@@ -4666,7 +4656,6 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
4666 sinfo->bytes_may_use -= num_bytes; 4656 sinfo->bytes_may_use -= num_bytes;
4667 trace_btrfs_space_reservation(fs_info, "space_info", 4657 trace_btrfs_space_reservation(fs_info, "space_info",
4668 sinfo->flags, num_bytes, 0); 4658 sinfo->flags, num_bytes, 0);
4669 sinfo->reservation_progress++;
4670 block_rsv->reserved = block_rsv->size; 4659 block_rsv->reserved = block_rsv->size;
4671 block_rsv->full = 1; 4660 block_rsv->full = 1;
4672 } 4661 }
@@ -5446,7 +5435,6 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
5446 space_info->bytes_readonly += num_bytes; 5435 space_info->bytes_readonly += num_bytes;
5447 cache->reserved -= num_bytes; 5436 cache->reserved -= num_bytes;
5448 space_info->bytes_reserved -= num_bytes; 5437 space_info->bytes_reserved -= num_bytes;
5449 space_info->reservation_progress++;
5450 } 5438 }
5451 spin_unlock(&cache->lock); 5439 spin_unlock(&cache->lock);
5452 spin_unlock(&space_info->lock); 5440 spin_unlock(&space_info->lock);
@@ -6117,10 +6105,13 @@ enum btrfs_loop_type {
6117/* 6105/*
6118 * walks the btree of allocated extents and find a hole of a given size. 6106 * walks the btree of allocated extents and find a hole of a given size.
6119 * The key ins is changed to record the hole: 6107 * The key ins is changed to record the hole:
6120 * ins->objectid == block start 6108 * ins->objectid == start position
6121 * ins->flags = BTRFS_EXTENT_ITEM_KEY 6109 * ins->flags = BTRFS_EXTENT_ITEM_KEY
6122 * ins->offset == number of blocks 6110 * ins->offset == the size of the hole.
6123 * Any available blocks before search_start are skipped. 6111 * Any available blocks before search_start are skipped.
6112 *
6113 * If there is no suitable free space, we will record the max size of
6114 * the free space extent currently.
6124 */ 6115 */
6125static noinline int find_free_extent(struct btrfs_root *orig_root, 6116static noinline int find_free_extent(struct btrfs_root *orig_root,
6126 u64 num_bytes, u64 empty_size, 6117 u64 num_bytes, u64 empty_size,
@@ -6133,6 +6124,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
6133 struct btrfs_block_group_cache *block_group = NULL; 6124 struct btrfs_block_group_cache *block_group = NULL;
6134 struct btrfs_block_group_cache *used_block_group; 6125 struct btrfs_block_group_cache *used_block_group;
6135 u64 search_start = 0; 6126 u64 search_start = 0;
6127 u64 max_extent_size = 0;
6136 int empty_cluster = 2 * 1024 * 1024; 6128 int empty_cluster = 2 * 1024 * 1024;
6137 struct btrfs_space_info *space_info; 6129 struct btrfs_space_info *space_info;
6138 int loop = 0; 6130 int loop = 0;
@@ -6292,7 +6284,10 @@ have_block_group:
6292 btrfs_get_block_group(used_block_group); 6284 btrfs_get_block_group(used_block_group);
6293 6285
6294 offset = btrfs_alloc_from_cluster(used_block_group, 6286 offset = btrfs_alloc_from_cluster(used_block_group,
6295 last_ptr, num_bytes, used_block_group->key.objectid); 6287 last_ptr,
6288 num_bytes,
6289 used_block_group->key.objectid,
6290 &max_extent_size);
6296 if (offset) { 6291 if (offset) {
6297 /* we have a block, we're done */ 6292 /* we have a block, we're done */
6298 spin_unlock(&last_ptr->refill_lock); 6293 spin_unlock(&last_ptr->refill_lock);
@@ -6355,8 +6350,10 @@ refill_cluster:
6355 * cluster 6350 * cluster
6356 */ 6351 */
6357 offset = btrfs_alloc_from_cluster(block_group, 6352 offset = btrfs_alloc_from_cluster(block_group,
6358 last_ptr, num_bytes, 6353 last_ptr,
6359 search_start); 6354 num_bytes,
6355 search_start,
6356 &max_extent_size);
6360 if (offset) { 6357 if (offset) {
6361 /* we found one, proceed */ 6358 /* we found one, proceed */
6362 spin_unlock(&last_ptr->refill_lock); 6359 spin_unlock(&last_ptr->refill_lock);
@@ -6391,13 +6388,18 @@ unclustered_alloc:
6391 if (cached && 6388 if (cached &&
6392 block_group->free_space_ctl->free_space < 6389 block_group->free_space_ctl->free_space <
6393 num_bytes + empty_cluster + empty_size) { 6390 num_bytes + empty_cluster + empty_size) {
6391 if (block_group->free_space_ctl->free_space >
6392 max_extent_size)
6393 max_extent_size =
6394 block_group->free_space_ctl->free_space;
6394 spin_unlock(&block_group->free_space_ctl->tree_lock); 6395 spin_unlock(&block_group->free_space_ctl->tree_lock);
6395 goto loop; 6396 goto loop;
6396 } 6397 }
6397 spin_unlock(&block_group->free_space_ctl->tree_lock); 6398 spin_unlock(&block_group->free_space_ctl->tree_lock);
6398 6399
6399 offset = btrfs_find_space_for_alloc(block_group, search_start, 6400 offset = btrfs_find_space_for_alloc(block_group, search_start,
6400 num_bytes, empty_size); 6401 num_bytes, empty_size,
6402 &max_extent_size);
6401 /* 6403 /*
6402 * If we didn't find a chunk, and we haven't failed on this 6404 * If we didn't find a chunk, and we haven't failed on this
6403 * block group before, and this block group is in the middle of 6405 * block group before, and this block group is in the middle of
@@ -6515,7 +6517,8 @@ loop:
6515 ret = 0; 6517 ret = 0;
6516 } 6518 }
6517out: 6519out:
6518 6520 if (ret == -ENOSPC)
6521 ins->offset = max_extent_size;
6519 return ret; 6522 return ret;
6520} 6523}
6521 6524
@@ -6573,8 +6576,8 @@ again:
6573 flags); 6576 flags);
6574 6577
6575 if (ret == -ENOSPC) { 6578 if (ret == -ENOSPC) {
6576 if (!final_tried) { 6579 if (!final_tried && ins->offset) {
6577 num_bytes = num_bytes >> 1; 6580 num_bytes = min(num_bytes >> 1, ins->offset);
6578 num_bytes = round_down(num_bytes, root->sectorsize); 6581 num_bytes = round_down(num_bytes, root->sectorsize);
6579 num_bytes = max(num_bytes, min_alloc_size); 6582 num_bytes = max(num_bytes, min_alloc_size);
6580 if (num_bytes == min_alloc_size) 6583 if (num_bytes == min_alloc_size)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 09582b81640c..22bda32acb89 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -145,8 +145,16 @@ int __init extent_io_init(void)
145 offsetof(struct btrfs_io_bio, bio)); 145 offsetof(struct btrfs_io_bio, bio));
146 if (!btrfs_bioset) 146 if (!btrfs_bioset)
147 goto free_buffer_cache; 147 goto free_buffer_cache;
148
149 if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
150 goto free_bioset;
151
148 return 0; 152 return 0;
149 153
154free_bioset:
155 bioset_free(btrfs_bioset);
156 btrfs_bioset = NULL;
157
150free_buffer_cache: 158free_buffer_cache:
151 kmem_cache_destroy(extent_buffer_cache); 159 kmem_cache_destroy(extent_buffer_cache);
152 extent_buffer_cache = NULL; 160 extent_buffer_cache = NULL;
@@ -1481,10 +1489,12 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
1481 *end = state->end; 1489 *end = state->end;
1482 cur_start = state->end + 1; 1490 cur_start = state->end + 1;
1483 node = rb_next(node); 1491 node = rb_next(node);
1484 if (!node)
1485 break;
1486 total_bytes += state->end - state->start + 1; 1492 total_bytes += state->end - state->start + 1;
1487 if (total_bytes >= max_bytes) 1493 if (total_bytes >= max_bytes) {
1494 *end = *start + max_bytes - 1;
1495 break;
1496 }
1497 if (!node)
1488 break; 1498 break;
1489 } 1499 }
1490out: 1500out:
@@ -1612,7 +1622,7 @@ again:
1612 *start = delalloc_start; 1622 *start = delalloc_start;
1613 *end = delalloc_end; 1623 *end = delalloc_end;
1614 free_extent_state(cached_state); 1624 free_extent_state(cached_state);
1615 return found; 1625 return 0;
1616 } 1626 }
1617 1627
1618 /* 1628 /*
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index bc5072b2db53..72da4df53c9a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1859,8 +1859,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1859 1859
1860 ret = btrfs_log_dentry_safe(trans, root, dentry); 1860 ret = btrfs_log_dentry_safe(trans, root, dentry);
1861 if (ret < 0) { 1861 if (ret < 0) {
1862 mutex_unlock(&inode->i_mutex); 1862 /* Fallthrough and commit/free transaction. */
1863 goto out; 1863 ret = 1;
1864 } 1864 }
1865 1865
1866 /* we've logged all the items and now have a consistent 1866 /* we've logged all the items and now have a consistent
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 3f0ddfce96e6..b4f9904c4c6b 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1431,13 +1431,19 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1431 ctl->free_space += bytes; 1431 ctl->free_space += bytes;
1432} 1432}
1433 1433
1434/*
1435 * If we can not find suitable extent, we will use bytes to record
1436 * the size of the max extent.
1437 */
1434static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1438static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1435 struct btrfs_free_space *bitmap_info, u64 *offset, 1439 struct btrfs_free_space *bitmap_info, u64 *offset,
1436 u64 *bytes) 1440 u64 *bytes)
1437{ 1441{
1438 unsigned long found_bits = 0; 1442 unsigned long found_bits = 0;
1443 unsigned long max_bits = 0;
1439 unsigned long bits, i; 1444 unsigned long bits, i;
1440 unsigned long next_zero; 1445 unsigned long next_zero;
1446 unsigned long extent_bits;
1441 1447
1442 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1448 i = offset_to_bit(bitmap_info->offset, ctl->unit,
1443 max_t(u64, *offset, bitmap_info->offset)); 1449 max_t(u64, *offset, bitmap_info->offset));
@@ -1446,9 +1452,12 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1446 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1452 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
1447 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1453 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1448 BITS_PER_BITMAP, i); 1454 BITS_PER_BITMAP, i);
1449 if ((next_zero - i) >= bits) { 1455 extent_bits = next_zero - i;
1450 found_bits = next_zero - i; 1456 if (extent_bits >= bits) {
1457 found_bits = extent_bits;
1451 break; 1458 break;
1459 } else if (extent_bits > max_bits) {
1460 max_bits = extent_bits;
1452 } 1461 }
1453 i = next_zero; 1462 i = next_zero;
1454 } 1463 }
@@ -1459,38 +1468,41 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1459 return 0; 1468 return 0;
1460 } 1469 }
1461 1470
1471 *bytes = (u64)(max_bits) * ctl->unit;
1462 return -1; 1472 return -1;
1463} 1473}
1464 1474
1475/* Cache the size of the max extent in bytes */
1465static struct btrfs_free_space * 1476static struct btrfs_free_space *
1466find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1477find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
1467 unsigned long align) 1478 unsigned long align, u64 *max_extent_size)
1468{ 1479{
1469 struct btrfs_free_space *entry; 1480 struct btrfs_free_space *entry;
1470 struct rb_node *node; 1481 struct rb_node *node;
1471 u64 ctl_off;
1472 u64 tmp; 1482 u64 tmp;
1473 u64 align_off; 1483 u64 align_off;
1474 int ret; 1484 int ret;
1475 1485
1476 if (!ctl->free_space_offset.rb_node) 1486 if (!ctl->free_space_offset.rb_node)
1477 return NULL; 1487 goto out;
1478 1488
1479 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1489 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1480 if (!entry) 1490 if (!entry)
1481 return NULL; 1491 goto out;
1482 1492
1483 for (node = &entry->offset_index; node; node = rb_next(node)) { 1493 for (node = &entry->offset_index; node; node = rb_next(node)) {
1484 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1494 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1485 if (entry->bytes < *bytes) 1495 if (entry->bytes < *bytes) {
1496 if (entry->bytes > *max_extent_size)
1497 *max_extent_size = entry->bytes;
1486 continue; 1498 continue;
1499 }
1487 1500
1488 /* make sure the space returned is big enough 1501 /* make sure the space returned is big enough
1489 * to match our requested alignment 1502 * to match our requested alignment
1490 */ 1503 */
1491 if (*bytes >= align) { 1504 if (*bytes >= align) {
1492 ctl_off = entry->offset - ctl->start; 1505 tmp = entry->offset - ctl->start + align - 1;
1493 tmp = ctl_off + align - 1;;
1494 do_div(tmp, align); 1506 do_div(tmp, align);
1495 tmp = tmp * align + ctl->start; 1507 tmp = tmp * align + ctl->start;
1496 align_off = tmp - entry->offset; 1508 align_off = tmp - entry->offset;
@@ -1499,14 +1511,22 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
1499 tmp = entry->offset; 1511 tmp = entry->offset;
1500 } 1512 }
1501 1513
1502 if (entry->bytes < *bytes + align_off) 1514 if (entry->bytes < *bytes + align_off) {
1515 if (entry->bytes > *max_extent_size)
1516 *max_extent_size = entry->bytes;
1503 continue; 1517 continue;
1518 }
1504 1519
1505 if (entry->bitmap) { 1520 if (entry->bitmap) {
1506 ret = search_bitmap(ctl, entry, &tmp, bytes); 1521 u64 size = *bytes;
1522
1523 ret = search_bitmap(ctl, entry, &tmp, &size);
1507 if (!ret) { 1524 if (!ret) {
1508 *offset = tmp; 1525 *offset = tmp;
1526 *bytes = size;
1509 return entry; 1527 return entry;
1528 } else if (size > *max_extent_size) {
1529 *max_extent_size = size;
1510 } 1530 }
1511 continue; 1531 continue;
1512 } 1532 }
@@ -1515,7 +1535,7 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
1515 *bytes = entry->bytes - align_off; 1535 *bytes = entry->bytes - align_off;
1516 return entry; 1536 return entry;
1517 } 1537 }
1518 1538out:
1519 return NULL; 1539 return NULL;
1520} 1540}
1521 1541
@@ -2116,7 +2136,8 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
2116} 2136}
2117 2137
2118u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2138u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2119 u64 offset, u64 bytes, u64 empty_size) 2139 u64 offset, u64 bytes, u64 empty_size,
2140 u64 *max_extent_size)
2120{ 2141{
2121 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2142 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2122 struct btrfs_free_space *entry = NULL; 2143 struct btrfs_free_space *entry = NULL;
@@ -2127,7 +2148,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2127 2148
2128 spin_lock(&ctl->tree_lock); 2149 spin_lock(&ctl->tree_lock);
2129 entry = find_free_space(ctl, &offset, &bytes_search, 2150 entry = find_free_space(ctl, &offset, &bytes_search,
2130 block_group->full_stripe_len); 2151 block_group->full_stripe_len, max_extent_size);
2131 if (!entry) 2152 if (!entry)
2132 goto out; 2153 goto out;
2133 2154
@@ -2137,7 +2158,6 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2137 if (!entry->bytes) 2158 if (!entry->bytes)
2138 free_bitmap(ctl, entry); 2159 free_bitmap(ctl, entry);
2139 } else { 2160 } else {
2140
2141 unlink_free_space(ctl, entry); 2161 unlink_free_space(ctl, entry);
2142 align_gap_len = offset - entry->offset; 2162 align_gap_len = offset - entry->offset;
2143 align_gap = entry->offset; 2163 align_gap = entry->offset;
@@ -2151,7 +2171,6 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2151 else 2171 else
2152 link_free_space(ctl, entry); 2172 link_free_space(ctl, entry);
2153 } 2173 }
2154
2155out: 2174out:
2156 spin_unlock(&ctl->tree_lock); 2175 spin_unlock(&ctl->tree_lock);
2157 2176
@@ -2206,7 +2225,8 @@ int btrfs_return_cluster_to_free_space(
2206static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2225static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2207 struct btrfs_free_cluster *cluster, 2226 struct btrfs_free_cluster *cluster,
2208 struct btrfs_free_space *entry, 2227 struct btrfs_free_space *entry,
2209 u64 bytes, u64 min_start) 2228 u64 bytes, u64 min_start,
2229 u64 *max_extent_size)
2210{ 2230{
2211 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2231 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2212 int err; 2232 int err;
@@ -2218,8 +2238,11 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2218 search_bytes = bytes; 2238 search_bytes = bytes;
2219 2239
2220 err = search_bitmap(ctl, entry, &search_start, &search_bytes); 2240 err = search_bitmap(ctl, entry, &search_start, &search_bytes);
2221 if (err) 2241 if (err) {
2242 if (search_bytes > *max_extent_size)
2243 *max_extent_size = search_bytes;
2222 return 0; 2244 return 0;
2245 }
2223 2246
2224 ret = search_start; 2247 ret = search_start;
2225 __bitmap_clear_bits(ctl, entry, ret, bytes); 2248 __bitmap_clear_bits(ctl, entry, ret, bytes);
@@ -2234,7 +2257,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2234 */ 2257 */
2235u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2258u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2236 struct btrfs_free_cluster *cluster, u64 bytes, 2259 struct btrfs_free_cluster *cluster, u64 bytes,
2237 u64 min_start) 2260 u64 min_start, u64 *max_extent_size)
2238{ 2261{
2239 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2262 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2240 struct btrfs_free_space *entry = NULL; 2263 struct btrfs_free_space *entry = NULL;
@@ -2254,6 +2277,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2254 2277
2255 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2278 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2256 while(1) { 2279 while(1) {
2280 if (entry->bytes < bytes && entry->bytes > *max_extent_size)
2281 *max_extent_size = entry->bytes;
2282
2257 if (entry->bytes < bytes || 2283 if (entry->bytes < bytes ||
2258 (!entry->bitmap && entry->offset < min_start)) { 2284 (!entry->bitmap && entry->offset < min_start)) {
2259 node = rb_next(&entry->offset_index); 2285 node = rb_next(&entry->offset_index);
@@ -2267,7 +2293,8 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2267 if (entry->bitmap) { 2293 if (entry->bitmap) {
2268 ret = btrfs_alloc_from_bitmap(block_group, 2294 ret = btrfs_alloc_from_bitmap(block_group,
2269 cluster, entry, bytes, 2295 cluster, entry, bytes,
2270 cluster->window_start); 2296 cluster->window_start,
2297 max_extent_size);
2271 if (ret == 0) { 2298 if (ret == 0) {
2272 node = rb_next(&entry->offset_index); 2299 node = rb_next(&entry->offset_index);
2273 if (!node) 2300 if (!node)
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index c74904167476..e737f92cf6d0 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -94,7 +94,8 @@ void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
94void btrfs_remove_free_space_cache(struct btrfs_block_group_cache 94void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
95 *block_group); 95 *block_group);
96u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 96u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
97 u64 offset, u64 bytes, u64 empty_size); 97 u64 offset, u64 bytes, u64 empty_size,
98 u64 *max_extent_size);
98u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root); 99u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
99void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 100void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
100 u64 bytes); 101 u64 bytes);
@@ -105,7 +106,7 @@ int btrfs_find_space_cluster(struct btrfs_root *root,
105void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster); 106void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster);
106u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 107u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
107 struct btrfs_free_cluster *cluster, u64 bytes, 108 struct btrfs_free_cluster *cluster, u64 bytes,
108 u64 min_start); 109 u64 min_start, u64 *max_extent_size);
109int btrfs_return_cluster_to_free_space( 110int btrfs_return_cluster_to_free_space(
110 struct btrfs_block_group_cache *block_group, 111 struct btrfs_block_group_cache *block_group,
111 struct btrfs_free_cluster *cluster); 112 struct btrfs_free_cluster *cluster);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f338c5672d58..22ebc13b6c99 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4688,11 +4688,11 @@ static void inode_tree_add(struct inode *inode)
4688 struct btrfs_inode *entry; 4688 struct btrfs_inode *entry;
4689 struct rb_node **p; 4689 struct rb_node **p;
4690 struct rb_node *parent; 4690 struct rb_node *parent;
4691 struct rb_node *new = &BTRFS_I(inode)->rb_node;
4691 u64 ino = btrfs_ino(inode); 4692 u64 ino = btrfs_ino(inode);
4692 4693
4693 if (inode_unhashed(inode)) 4694 if (inode_unhashed(inode))
4694 return; 4695 return;
4695again:
4696 parent = NULL; 4696 parent = NULL;
4697 spin_lock(&root->inode_lock); 4697 spin_lock(&root->inode_lock);
4698 p = &root->inode_tree.rb_node; 4698 p = &root->inode_tree.rb_node;
@@ -4707,14 +4707,14 @@ again:
4707 else { 4707 else {
4708 WARN_ON(!(entry->vfs_inode.i_state & 4708 WARN_ON(!(entry->vfs_inode.i_state &
4709 (I_WILL_FREE | I_FREEING))); 4709 (I_WILL_FREE | I_FREEING)));
4710 rb_erase(parent, &root->inode_tree); 4710 rb_replace_node(parent, new, &root->inode_tree);
4711 RB_CLEAR_NODE(parent); 4711 RB_CLEAR_NODE(parent);
4712 spin_unlock(&root->inode_lock); 4712 spin_unlock(&root->inode_lock);
4713 goto again; 4713 return;
4714 } 4714 }
4715 } 4715 }
4716 rb_link_node(&BTRFS_I(inode)->rb_node, parent, p); 4716 rb_link_node(new, parent, p);
4717 rb_insert_color(&BTRFS_I(inode)->rb_node, &root->inode_tree); 4717 rb_insert_color(new, &root->inode_tree);
4718 spin_unlock(&root->inode_lock); 4718 spin_unlock(&root->inode_lock);
4719} 4719}
4720 4720
@@ -8216,6 +8216,10 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
8216 8216
8217 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); 8217 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
8218 if (unlikely(!work)) { 8218 if (unlikely(!work)) {
8219 if (delay_iput)
8220 btrfs_add_delayed_iput(inode);
8221 else
8222 iput(inode);
8219 ret = -ENOMEM; 8223 ret = -ENOMEM;
8220 goto out; 8224 goto out;
8221 } 8225 }
@@ -8613,11 +8617,13 @@ static const struct inode_operations btrfs_dir_inode_operations = {
8613 .removexattr = btrfs_removexattr, 8617 .removexattr = btrfs_removexattr,
8614 .permission = btrfs_permission, 8618 .permission = btrfs_permission,
8615 .get_acl = btrfs_get_acl, 8619 .get_acl = btrfs_get_acl,
8620 .update_time = btrfs_update_time,
8616}; 8621};
8617static const struct inode_operations btrfs_dir_ro_inode_operations = { 8622static const struct inode_operations btrfs_dir_ro_inode_operations = {
8618 .lookup = btrfs_lookup, 8623 .lookup = btrfs_lookup,
8619 .permission = btrfs_permission, 8624 .permission = btrfs_permission,
8620 .get_acl = btrfs_get_acl, 8625 .get_acl = btrfs_get_acl,
8626 .update_time = btrfs_update_time,
8621}; 8627};
8622 8628
8623static const struct file_operations btrfs_dir_file_operations = { 8629static const struct file_operations btrfs_dir_file_operations = {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 1a5b9462dd9a..9d46f60cb943 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -574,7 +574,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
574 if (ret) 574 if (ret)
575 return ret; 575 return ret;
576 576
577 btrfs_wait_ordered_extents(root, 0); 577 btrfs_wait_ordered_extents(root);
578 578
579 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); 579 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
580 if (!pending_snapshot) 580 if (!pending_snapshot)
@@ -2696,9 +2696,9 @@ out_unlock:
2696static long btrfs_ioctl_file_extent_same(struct file *file, 2696static long btrfs_ioctl_file_extent_same(struct file *file,
2697 void __user *argp) 2697 void __user *argp)
2698{ 2698{
2699 struct btrfs_ioctl_same_args *args = argp; 2699 struct btrfs_ioctl_same_args tmp;
2700 struct btrfs_ioctl_same_args same; 2700 struct btrfs_ioctl_same_args *same;
2701 struct btrfs_ioctl_same_extent_info info; 2701 struct btrfs_ioctl_same_extent_info *info;
2702 struct inode *src = file->f_dentry->d_inode; 2702 struct inode *src = file->f_dentry->d_inode;
2703 struct file *dst_file = NULL; 2703 struct file *dst_file = NULL;
2704 struct inode *dst; 2704 struct inode *dst;
@@ -2706,6 +2706,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
2706 u64 len; 2706 u64 len;
2707 int i; 2707 int i;
2708 int ret; 2708 int ret;
2709 unsigned long size;
2709 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; 2710 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
2710 bool is_admin = capable(CAP_SYS_ADMIN); 2711 bool is_admin = capable(CAP_SYS_ADMIN);
2711 2712
@@ -2716,15 +2717,30 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
2716 if (ret) 2717 if (ret)
2717 return ret; 2718 return ret;
2718 2719
2719 if (copy_from_user(&same, 2720 if (copy_from_user(&tmp,
2720 (struct btrfs_ioctl_same_args __user *)argp, 2721 (struct btrfs_ioctl_same_args __user *)argp,
2721 sizeof(same))) { 2722 sizeof(tmp))) {
2722 ret = -EFAULT; 2723 ret = -EFAULT;
2723 goto out; 2724 goto out;
2724 } 2725 }
2725 2726
2726 off = same.logical_offset; 2727 size = sizeof(tmp) +
2727 len = same.length; 2728 tmp.dest_count * sizeof(struct btrfs_ioctl_same_extent_info);
2729
2730 same = kmalloc(size, GFP_NOFS);
2731 if (!same) {
2732 ret = -EFAULT;
2733 goto out;
2734 }
2735
2736 if (copy_from_user(same,
2737 (struct btrfs_ioctl_same_args __user *)argp, size)) {
2738 ret = -EFAULT;
2739 goto out;
2740 }
2741
2742 off = same->logical_offset;
2743 len = same->length;
2728 2744
2729 /* 2745 /*
2730 * Limit the total length we will dedupe for each operation. 2746 * Limit the total length we will dedupe for each operation.
@@ -2752,27 +2768,28 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
2752 if (!S_ISREG(src->i_mode)) 2768 if (!S_ISREG(src->i_mode))
2753 goto out; 2769 goto out;
2754 2770
2755 ret = 0; 2771 /* pre-format output fields to sane values */
2756 for (i = 0; i < same.dest_count; i++) { 2772 for (i = 0; i < same->dest_count; i++) {
2757 if (copy_from_user(&info, &args->info[i], sizeof(info))) { 2773 same->info[i].bytes_deduped = 0ULL;
2758 ret = -EFAULT; 2774 same->info[i].status = 0;
2759 goto out; 2775 }
2760 }
2761 2776
2762 info.bytes_deduped = 0; 2777 ret = 0;
2778 for (i = 0; i < same->dest_count; i++) {
2779 info = &same->info[i];
2763 2780
2764 dst_file = fget(info.fd); 2781 dst_file = fget(info->fd);
2765 if (!dst_file) { 2782 if (!dst_file) {
2766 info.status = -EBADF; 2783 info->status = -EBADF;
2767 goto next; 2784 goto next;
2768 } 2785 }
2769 2786
2770 if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) { 2787 if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
2771 info.status = -EINVAL; 2788 info->status = -EINVAL;
2772 goto next; 2789 goto next;
2773 } 2790 }
2774 2791
2775 info.status = -EXDEV; 2792 info->status = -EXDEV;
2776 if (file->f_path.mnt != dst_file->f_path.mnt) 2793 if (file->f_path.mnt != dst_file->f_path.mnt)
2777 goto next; 2794 goto next;
2778 2795
@@ -2781,32 +2798,29 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
2781 goto next; 2798 goto next;
2782 2799
2783 if (S_ISDIR(dst->i_mode)) { 2800 if (S_ISDIR(dst->i_mode)) {
2784 info.status = -EISDIR; 2801 info->status = -EISDIR;
2785 goto next; 2802 goto next;
2786 } 2803 }
2787 2804
2788 if (!S_ISREG(dst->i_mode)) { 2805 if (!S_ISREG(dst->i_mode)) {
2789 info.status = -EACCES; 2806 info->status = -EACCES;
2790 goto next; 2807 goto next;
2791 } 2808 }
2792 2809
2793 info.status = btrfs_extent_same(src, off, len, dst, 2810 info->status = btrfs_extent_same(src, off, len, dst,
2794 info.logical_offset); 2811 info->logical_offset);
2795 if (info.status == 0) 2812 if (info->status == 0)
2796 info.bytes_deduped += len; 2813 info->bytes_deduped += len;
2797 2814
2798next: 2815next:
2799 if (dst_file) 2816 if (dst_file)
2800 fput(dst_file); 2817 fput(dst_file);
2801
2802 if (__put_user_unaligned(info.status, &args->info[i].status) ||
2803 __put_user_unaligned(info.bytes_deduped,
2804 &args->info[i].bytes_deduped)) {
2805 ret = -EFAULT;
2806 goto out;
2807 }
2808 } 2818 }
2809 2819
2820 ret = copy_to_user(argp, same, size);
2821 if (ret)
2822 ret = -EFAULT;
2823
2810out: 2824out:
2811 mnt_drop_write_file(file); 2825 mnt_drop_write_file(file);
2812 return ret; 2826 return ret;
@@ -3310,7 +3324,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
3310 } 3324 }
3311 3325
3312 if (!objectid) 3326 if (!objectid)
3313 objectid = root->root_key.objectid; 3327 objectid = BTRFS_FS_TREE_OBJECTID;
3314 3328
3315 location.objectid = objectid; 3329 location.objectid = objectid;
3316 location.type = BTRFS_ROOT_ITEM_KEY; 3330 location.type = BTRFS_ROOT_ITEM_KEY;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 966b413a33b8..c702cb62f78a 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -563,11 +563,10 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
563 * wait for all the ordered extents in a root. This is done when balancing 563 * wait for all the ordered extents in a root. This is done when balancing
564 * space between drives. 564 * space between drives.
565 */ 565 */
566void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) 566void btrfs_wait_ordered_extents(struct btrfs_root *root)
567{ 567{
568 struct list_head splice, works; 568 struct list_head splice, works;
569 struct btrfs_ordered_extent *ordered, *next; 569 struct btrfs_ordered_extent *ordered, *next;
570 struct inode *inode;
571 570
572 INIT_LIST_HEAD(&splice); 571 INIT_LIST_HEAD(&splice);
573 INIT_LIST_HEAD(&works); 572 INIT_LIST_HEAD(&works);
@@ -580,15 +579,6 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
580 root_extent_list); 579 root_extent_list);
581 list_move_tail(&ordered->root_extent_list, 580 list_move_tail(&ordered->root_extent_list,
582 &root->ordered_extents); 581 &root->ordered_extents);
583 /*
584 * the inode may be getting freed (in sys_unlink path).
585 */
586 inode = igrab(ordered->inode);
587 if (!inode) {
588 cond_resched_lock(&root->ordered_extent_lock);
589 continue;
590 }
591
592 atomic_inc(&ordered->refs); 582 atomic_inc(&ordered->refs);
593 spin_unlock(&root->ordered_extent_lock); 583 spin_unlock(&root->ordered_extent_lock);
594 584
@@ -605,21 +595,13 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
605 list_for_each_entry_safe(ordered, next, &works, work_list) { 595 list_for_each_entry_safe(ordered, next, &works, work_list) {
606 list_del_init(&ordered->work_list); 596 list_del_init(&ordered->work_list);
607 wait_for_completion(&ordered->completion); 597 wait_for_completion(&ordered->completion);
608
609 inode = ordered->inode;
610 btrfs_put_ordered_extent(ordered); 598 btrfs_put_ordered_extent(ordered);
611 if (delay_iput)
612 btrfs_add_delayed_iput(inode);
613 else
614 iput(inode);
615
616 cond_resched(); 599 cond_resched();
617 } 600 }
618 mutex_unlock(&root->fs_info->ordered_operations_mutex); 601 mutex_unlock(&root->fs_info->ordered_operations_mutex);
619} 602}
620 603
621void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info, 604void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info)
622 int delay_iput)
623{ 605{
624 struct btrfs_root *root; 606 struct btrfs_root *root;
625 struct list_head splice; 607 struct list_head splice;
@@ -637,7 +619,7 @@ void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info,
637 &fs_info->ordered_roots); 619 &fs_info->ordered_roots);
638 spin_unlock(&fs_info->ordered_root_lock); 620 spin_unlock(&fs_info->ordered_root_lock);
639 621
640 btrfs_wait_ordered_extents(root, delay_iput); 622 btrfs_wait_ordered_extents(root);
641 btrfs_put_fs_root(root); 623 btrfs_put_fs_root(root);
642 624
643 spin_lock(&fs_info->ordered_root_lock); 625 spin_lock(&fs_info->ordered_root_lock);
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index d9a5aa097b4f..0c0b35612d7a 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -195,9 +195,8 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
195void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 195void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
196 struct btrfs_root *root, 196 struct btrfs_root *root,
197 struct inode *inode); 197 struct inode *inode);
198void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput); 198void btrfs_wait_ordered_extents(struct btrfs_root *root);
199void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info, 199void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info);
200 int delay_iput);
201void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode); 200void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode);
202void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); 201void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid);
203void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); 202void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index aacc2121e87c..a5a26320503f 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1548,7 +1548,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1548 btrfs_file_extent_other_encoding(leaf, fi)); 1548 btrfs_file_extent_other_encoding(leaf, fi));
1549 1549
1550 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) { 1550 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1551 ret = 1; 1551 ret = -EINVAL;
1552 goto out; 1552 goto out;
1553 } 1553 }
1554 1554
@@ -1579,7 +1579,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
1579 u64 end; 1579 u64 end;
1580 u32 nritems; 1580 u32 nritems;
1581 u32 i; 1581 u32 i;
1582 int ret; 1582 int ret = 0;
1583 int first = 1; 1583 int first = 1;
1584 int dirty = 0; 1584 int dirty = 0;
1585 1585
@@ -1642,11 +1642,13 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
1642 1642
1643 ret = get_new_location(rc->data_inode, &new_bytenr, 1643 ret = get_new_location(rc->data_inode, &new_bytenr,
1644 bytenr, num_bytes); 1644 bytenr, num_bytes);
1645 if (ret > 0) { 1645 if (ret) {
1646 WARN_ON(1); 1646 /*
1647 continue; 1647 * Don't have to abort since we've not changed anything
1648 * in the file extent yet.
1649 */
1650 break;
1648 } 1651 }
1649 BUG_ON(ret < 0);
1650 1652
1651 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); 1653 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
1652 dirty = 1; 1654 dirty = 1;
@@ -1656,18 +1658,24 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
1656 num_bytes, parent, 1658 num_bytes, parent,
1657 btrfs_header_owner(leaf), 1659 btrfs_header_owner(leaf),
1658 key.objectid, key.offset, 1); 1660 key.objectid, key.offset, 1);
1659 BUG_ON(ret); 1661 if (ret) {
1662 btrfs_abort_transaction(trans, root, ret);
1663 break;
1664 }
1660 1665
1661 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 1666 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1662 parent, btrfs_header_owner(leaf), 1667 parent, btrfs_header_owner(leaf),
1663 key.objectid, key.offset, 1); 1668 key.objectid, key.offset, 1);
1664 BUG_ON(ret); 1669 if (ret) {
1670 btrfs_abort_transaction(trans, root, ret);
1671 break;
1672 }
1665 } 1673 }
1666 if (dirty) 1674 if (dirty)
1667 btrfs_mark_buffer_dirty(leaf); 1675 btrfs_mark_buffer_dirty(leaf);
1668 if (inode) 1676 if (inode)
1669 btrfs_add_delayed_iput(inode); 1677 btrfs_add_delayed_iput(inode);
1670 return 0; 1678 return ret;
1671} 1679}
1672 1680
1673static noinline_for_stack 1681static noinline_for_stack
@@ -4238,7 +4246,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
4238 err = ret; 4246 err = ret;
4239 goto out; 4247 goto out;
4240 } 4248 }
4241 btrfs_wait_all_ordered_extents(fs_info, 0); 4249 btrfs_wait_all_ordered_extents(fs_info);
4242 4250
4243 while (1) { 4251 while (1) {
4244 mutex_lock(&fs_info->cleaner_mutex); 4252 mutex_lock(&fs_info->cleaner_mutex);
@@ -4499,19 +4507,19 @@ out:
4499 return ret; 4507 return ret;
4500} 4508}
4501 4509
4502void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, 4510int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
4503 struct btrfs_root *root, struct extent_buffer *buf, 4511 struct btrfs_root *root, struct extent_buffer *buf,
4504 struct extent_buffer *cow) 4512 struct extent_buffer *cow)
4505{ 4513{
4506 struct reloc_control *rc; 4514 struct reloc_control *rc;
4507 struct backref_node *node; 4515 struct backref_node *node;
4508 int first_cow = 0; 4516 int first_cow = 0;
4509 int level; 4517 int level;
4510 int ret; 4518 int ret = 0;
4511 4519
4512 rc = root->fs_info->reloc_ctl; 4520 rc = root->fs_info->reloc_ctl;
4513 if (!rc) 4521 if (!rc)
4514 return; 4522 return 0;
4515 4523
4516 BUG_ON(rc->stage == UPDATE_DATA_PTRS && 4524 BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
4517 root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); 4525 root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
@@ -4547,10 +4555,9 @@ void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
4547 rc->nodes_relocated += buf->len; 4555 rc->nodes_relocated += buf->len;
4548 } 4556 }
4549 4557
4550 if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) { 4558 if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS)
4551 ret = replace_file_extents(trans, rc, root, cow); 4559 ret = replace_file_extents(trans, rc, root, cow);
4552 BUG_ON(ret); 4560 return ret;
4553 }
4554} 4561}
4555 4562
4556/* 4563/*
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 0afcd452fcb3..a18e0e23f6a6 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -158,12 +158,20 @@ struct scrub_fixup_nodatasum {
158 int mirror_num; 158 int mirror_num;
159}; 159};
160 160
161struct scrub_nocow_inode {
162 u64 inum;
163 u64 offset;
164 u64 root;
165 struct list_head list;
166};
167
161struct scrub_copy_nocow_ctx { 168struct scrub_copy_nocow_ctx {
162 struct scrub_ctx *sctx; 169 struct scrub_ctx *sctx;
163 u64 logical; 170 u64 logical;
164 u64 len; 171 u64 len;
165 int mirror_num; 172 int mirror_num;
166 u64 physical_for_dev_replace; 173 u64 physical_for_dev_replace;
174 struct list_head inodes;
167 struct btrfs_work work; 175 struct btrfs_work work;
168}; 176};
169 177
@@ -245,7 +253,7 @@ static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
245static int write_page_nocow(struct scrub_ctx *sctx, 253static int write_page_nocow(struct scrub_ctx *sctx,
246 u64 physical_for_dev_replace, struct page *page); 254 u64 physical_for_dev_replace, struct page *page);
247static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, 255static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
248 void *ctx); 256 struct scrub_copy_nocow_ctx *ctx);
249static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, 257static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
250 int mirror_num, u64 physical_for_dev_replace); 258 int mirror_num, u64 physical_for_dev_replace);
251static void copy_nocow_pages_worker(struct btrfs_work *work); 259static void copy_nocow_pages_worker(struct btrfs_work *work);
@@ -3126,12 +3134,30 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
3126 nocow_ctx->mirror_num = mirror_num; 3134 nocow_ctx->mirror_num = mirror_num;
3127 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; 3135 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
3128 nocow_ctx->work.func = copy_nocow_pages_worker; 3136 nocow_ctx->work.func = copy_nocow_pages_worker;
3137 INIT_LIST_HEAD(&nocow_ctx->inodes);
3129 btrfs_queue_worker(&fs_info->scrub_nocow_workers, 3138 btrfs_queue_worker(&fs_info->scrub_nocow_workers,
3130 &nocow_ctx->work); 3139 &nocow_ctx->work);
3131 3140
3132 return 0; 3141 return 0;
3133} 3142}
3134 3143
3144static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx)
3145{
3146 struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
3147 struct scrub_nocow_inode *nocow_inode;
3148
3149 nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS);
3150 if (!nocow_inode)
3151 return -ENOMEM;
3152 nocow_inode->inum = inum;
3153 nocow_inode->offset = offset;
3154 nocow_inode->root = root;
3155 list_add_tail(&nocow_inode->list, &nocow_ctx->inodes);
3156 return 0;
3157}
3158
3159#define COPY_COMPLETE 1
3160
3135static void copy_nocow_pages_worker(struct btrfs_work *work) 3161static void copy_nocow_pages_worker(struct btrfs_work *work)
3136{ 3162{
3137 struct scrub_copy_nocow_ctx *nocow_ctx = 3163 struct scrub_copy_nocow_ctx *nocow_ctx =
@@ -3167,8 +3193,7 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
3167 } 3193 }
3168 3194
3169 ret = iterate_inodes_from_logical(logical, fs_info, path, 3195 ret = iterate_inodes_from_logical(logical, fs_info, path,
3170 copy_nocow_pages_for_inode, 3196 record_inode_for_nocow, nocow_ctx);
3171 nocow_ctx);
3172 if (ret != 0 && ret != -ENOENT) { 3197 if (ret != 0 && ret != -ENOENT) {
3173 pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d\n", 3198 pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d\n",
3174 logical, physical_for_dev_replace, len, mirror_num, 3199 logical, physical_for_dev_replace, len, mirror_num,
@@ -3177,7 +3202,33 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
3177 goto out; 3202 goto out;
3178 } 3203 }
3179 3204
3205 btrfs_end_transaction(trans, root);
3206 trans = NULL;
3207 while (!list_empty(&nocow_ctx->inodes)) {
3208 struct scrub_nocow_inode *entry;
3209 entry = list_first_entry(&nocow_ctx->inodes,
3210 struct scrub_nocow_inode,
3211 list);
3212 list_del_init(&entry->list);
3213 ret = copy_nocow_pages_for_inode(entry->inum, entry->offset,
3214 entry->root, nocow_ctx);
3215 kfree(entry);
3216 if (ret == COPY_COMPLETE) {
3217 ret = 0;
3218 break;
3219 } else if (ret) {
3220 break;
3221 }
3222 }
3180out: 3223out:
3224 while (!list_empty(&nocow_ctx->inodes)) {
3225 struct scrub_nocow_inode *entry;
3226 entry = list_first_entry(&nocow_ctx->inodes,
3227 struct scrub_nocow_inode,
3228 list);
3229 list_del_init(&entry->list);
3230 kfree(entry);
3231 }
3181 if (trans && !IS_ERR(trans)) 3232 if (trans && !IS_ERR(trans))
3182 btrfs_end_transaction(trans, root); 3233 btrfs_end_transaction(trans, root);
3183 if (not_written) 3234 if (not_written)
@@ -3190,20 +3241,25 @@ out:
3190 scrub_pending_trans_workers_dec(sctx); 3241 scrub_pending_trans_workers_dec(sctx);
3191} 3242}
3192 3243
3193static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) 3244static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
3245 struct scrub_copy_nocow_ctx *nocow_ctx)
3194{ 3246{
3195 struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
3196 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info; 3247 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
3197 struct btrfs_key key; 3248 struct btrfs_key key;
3198 struct inode *inode; 3249 struct inode *inode;
3199 struct page *page; 3250 struct page *page;
3200 struct btrfs_root *local_root; 3251 struct btrfs_root *local_root;
3252 struct btrfs_ordered_extent *ordered;
3253 struct extent_map *em;
3254 struct extent_state *cached_state = NULL;
3255 struct extent_io_tree *io_tree;
3201 u64 physical_for_dev_replace; 3256 u64 physical_for_dev_replace;
3202 u64 len; 3257 u64 len = nocow_ctx->len;
3258 u64 lockstart = offset, lockend = offset + len - 1;
3203 unsigned long index; 3259 unsigned long index;
3204 int srcu_index; 3260 int srcu_index;
3205 int ret; 3261 int ret = 0;
3206 int err; 3262 int err = 0;
3207 3263
3208 key.objectid = root; 3264 key.objectid = root;
3209 key.type = BTRFS_ROOT_ITEM_KEY; 3265 key.type = BTRFS_ROOT_ITEM_KEY;
@@ -3229,9 +3285,33 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
3229 mutex_lock(&inode->i_mutex); 3285 mutex_lock(&inode->i_mutex);
3230 inode_dio_wait(inode); 3286 inode_dio_wait(inode);
3231 3287
3232 ret = 0;
3233 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; 3288 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
3234 len = nocow_ctx->len; 3289 io_tree = &BTRFS_I(inode)->io_tree;
3290
3291 lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
3292 ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
3293 if (ordered) {
3294 btrfs_put_ordered_extent(ordered);
3295 goto out_unlock;
3296 }
3297
3298 em = btrfs_get_extent(inode, NULL, 0, lockstart, len, 0);
3299 if (IS_ERR(em)) {
3300 ret = PTR_ERR(em);
3301 goto out_unlock;
3302 }
3303
3304 /*
3305 * This extent does not actually cover the logical extent anymore,
3306 * move on to the next inode.
3307 */
3308 if (em->block_start > nocow_ctx->logical ||
3309 em->block_start + em->block_len < nocow_ctx->logical + len) {
3310 free_extent_map(em);
3311 goto out_unlock;
3312 }
3313 free_extent_map(em);
3314
3235 while (len >= PAGE_CACHE_SIZE) { 3315 while (len >= PAGE_CACHE_SIZE) {
3236 index = offset >> PAGE_CACHE_SHIFT; 3316 index = offset >> PAGE_CACHE_SHIFT;
3237again: 3317again:
@@ -3247,10 +3327,9 @@ again:
3247 goto next_page; 3327 goto next_page;
3248 } else { 3328 } else {
3249 ClearPageError(page); 3329 ClearPageError(page);
3250 err = extent_read_full_page(&BTRFS_I(inode)-> 3330 err = extent_read_full_page_nolock(io_tree, page,
3251 io_tree, 3331 btrfs_get_extent,
3252 page, btrfs_get_extent, 3332 nocow_ctx->mirror_num);
3253 nocow_ctx->mirror_num);
3254 if (err) { 3333 if (err) {
3255 ret = err; 3334 ret = err;
3256 goto next_page; 3335 goto next_page;
@@ -3264,6 +3343,7 @@ again:
3264 * page in the page cache. 3343 * page in the page cache.
3265 */ 3344 */
3266 if (page->mapping != inode->i_mapping) { 3345 if (page->mapping != inode->i_mapping) {
3346 unlock_page(page);
3267 page_cache_release(page); 3347 page_cache_release(page);
3268 goto again; 3348 goto again;
3269 } 3349 }
@@ -3287,6 +3367,10 @@ next_page:
3287 physical_for_dev_replace += PAGE_CACHE_SIZE; 3367 physical_for_dev_replace += PAGE_CACHE_SIZE;
3288 len -= PAGE_CACHE_SIZE; 3368 len -= PAGE_CACHE_SIZE;
3289 } 3369 }
3370 ret = COPY_COMPLETE;
3371out_unlock:
3372 unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
3373 GFP_NOFS);
3290out: 3374out:
3291 mutex_unlock(&inode->i_mutex); 3375 mutex_unlock(&inode->i_mutex);
3292 iput(inode); 3376 iput(inode);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 3aab10ce63e8..e913328d0f2a 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -921,7 +921,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
921 return 0; 921 return 0;
922 } 922 }
923 923
924 btrfs_wait_all_ordered_extents(fs_info, 1); 924 btrfs_wait_all_ordered_extents(fs_info);
925 925
926 trans = btrfs_attach_transaction_barrier(root); 926 trans = btrfs_attach_transaction_barrier(root);
927 if (IS_ERR(trans)) { 927 if (IS_ERR(trans)) {
@@ -1340,6 +1340,12 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1340 if (ret) 1340 if (ret)
1341 goto restore; 1341 goto restore;
1342 } else { 1342 } else {
1343 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
1344 btrfs_err(fs_info,
1345 "Remounting read-write after error is not allowed\n");
1346 ret = -EINVAL;
1347 goto restore;
1348 }
1343 if (fs_info->fs_devices->rw_devices == 0) { 1349 if (fs_info->fs_devices->rw_devices == 0) {
1344 ret = -EACCES; 1350 ret = -EACCES;
1345 goto restore; 1351 goto restore;
@@ -1377,6 +1383,16 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1377 pr_warn("btrfs: failed to resume dev_replace\n"); 1383 pr_warn("btrfs: failed to resume dev_replace\n");
1378 goto restore; 1384 goto restore;
1379 } 1385 }
1386
1387 if (!fs_info->uuid_root) {
1388 pr_info("btrfs: creating UUID tree\n");
1389 ret = btrfs_create_uuid_tree(fs_info);
1390 if (ret) {
1391 pr_warn("btrfs: failed to create the uuid tree"
1392 "%d\n", ret);
1393 goto restore;
1394 }
1395 }
1380 sb->s_flags &= ~MS_RDONLY; 1396 sb->s_flags &= ~MS_RDONLY;
1381 } 1397 }
1382out: 1398out:
@@ -1762,6 +1778,9 @@ static void btrfs_print_info(void)
1762#ifdef CONFIG_BTRFS_DEBUG 1778#ifdef CONFIG_BTRFS_DEBUG
1763 ", debug=on" 1779 ", debug=on"
1764#endif 1780#endif
1781#ifdef CONFIG_BTRFS_ASSERT
1782 ", assert=on"
1783#endif
1765#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 1784#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
1766 ", integrity-checker=on" 1785 ", integrity-checker=on"
1767#endif 1786#endif
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index cac4a3f76323..8c81bdc1ef9b 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1603,7 +1603,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
1603static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) 1603static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
1604{ 1604{
1605 if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) 1605 if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
1606 btrfs_wait_all_ordered_extents(fs_info, 1); 1606 btrfs_wait_all_ordered_extents(fs_info);
1607} 1607}
1608 1608
1609int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 1609int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
@@ -1838,11 +1838,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1838 assert_qgroups_uptodate(trans); 1838 assert_qgroups_uptodate(trans);
1839 update_super_roots(root); 1839 update_super_roots(root);
1840 1840
1841 if (!root->fs_info->log_root_recovering) { 1841 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
1842 btrfs_set_super_log_root(root->fs_info->super_copy, 0); 1842 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
1843 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
1844 }
1845
1846 memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy, 1843 memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy,
1847 sizeof(*root->fs_info->super_copy)); 1844 sizeof(*root->fs_info->super_copy));
1848 1845
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 0d9613c3f5e5..79f057c0619a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -93,7 +93,8 @@
93 */ 93 */
94#define LOG_WALK_PIN_ONLY 0 94#define LOG_WALK_PIN_ONLY 0
95#define LOG_WALK_REPLAY_INODES 1 95#define LOG_WALK_REPLAY_INODES 1
96#define LOG_WALK_REPLAY_ALL 2 96#define LOG_WALK_REPLAY_DIR_INDEX 2
97#define LOG_WALK_REPLAY_ALL 3
97 98
98static int btrfs_log_inode(struct btrfs_trans_handle *trans, 99static int btrfs_log_inode(struct btrfs_trans_handle *trans,
99 struct btrfs_root *root, struct inode *inode, 100 struct btrfs_root *root, struct inode *inode,
@@ -393,6 +394,7 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
393 if (inode_item) { 394 if (inode_item) {
394 struct btrfs_inode_item *item; 395 struct btrfs_inode_item *item;
395 u64 nbytes; 396 u64 nbytes;
397 u32 mode;
396 398
397 item = btrfs_item_ptr(path->nodes[0], path->slots[0], 399 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
398 struct btrfs_inode_item); 400 struct btrfs_inode_item);
@@ -400,9 +402,19 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
400 item = btrfs_item_ptr(eb, slot, 402 item = btrfs_item_ptr(eb, slot,
401 struct btrfs_inode_item); 403 struct btrfs_inode_item);
402 btrfs_set_inode_nbytes(eb, item, nbytes); 404 btrfs_set_inode_nbytes(eb, item, nbytes);
405
406 /*
407 * If this is a directory we need to reset the i_size to
408 * 0 so that we can set it up properly when replaying
409 * the rest of the items in this log.
410 */
411 mode = btrfs_inode_mode(eb, item);
412 if (S_ISDIR(mode))
413 btrfs_set_inode_size(eb, item, 0);
403 } 414 }
404 } else if (inode_item) { 415 } else if (inode_item) {
405 struct btrfs_inode_item *item; 416 struct btrfs_inode_item *item;
417 u32 mode;
406 418
407 /* 419 /*
408 * New inode, set nbytes to 0 so that the nbytes comes out 420 * New inode, set nbytes to 0 so that the nbytes comes out
@@ -410,6 +422,15 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
410 */ 422 */
411 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); 423 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
412 btrfs_set_inode_nbytes(eb, item, 0); 424 btrfs_set_inode_nbytes(eb, item, 0);
425
426 /*
427 * If this is a directory we need to reset the i_size to 0 so
428 * that we can set it up properly when replaying the rest of
429 * the items in this log.
430 */
431 mode = btrfs_inode_mode(eb, item);
432 if (S_ISDIR(mode))
433 btrfs_set_inode_size(eb, item, 0);
413 } 434 }
414insert: 435insert:
415 btrfs_release_path(path); 436 btrfs_release_path(path);
@@ -1496,6 +1517,7 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
1496 iput(inode); 1517 iput(inode);
1497 return -EIO; 1518 return -EIO;
1498 } 1519 }
1520
1499 ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index); 1521 ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index);
1500 1522
1501 /* FIXME, put inode into FIXUP list */ 1523 /* FIXME, put inode into FIXUP list */
@@ -1534,6 +1556,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1534 u8 log_type; 1556 u8 log_type;
1535 int exists; 1557 int exists;
1536 int ret = 0; 1558 int ret = 0;
1559 bool update_size = (key->type == BTRFS_DIR_INDEX_KEY);
1537 1560
1538 dir = read_one_inode(root, key->objectid); 1561 dir = read_one_inode(root, key->objectid);
1539 if (!dir) 1562 if (!dir)
@@ -1604,6 +1627,10 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1604 goto insert; 1627 goto insert;
1605out: 1628out:
1606 btrfs_release_path(path); 1629 btrfs_release_path(path);
1630 if (!ret && update_size) {
1631 btrfs_i_size_write(dir, dir->i_size + name_len * 2);
1632 ret = btrfs_update_inode(trans, root, dir);
1633 }
1607 kfree(name); 1634 kfree(name);
1608 iput(dir); 1635 iput(dir);
1609 return ret; 1636 return ret;
@@ -1614,6 +1641,7 @@ insert:
1614 name, name_len, log_type, &log_key); 1641 name, name_len, log_type, &log_key);
1615 if (ret && ret != -ENOENT) 1642 if (ret && ret != -ENOENT)
1616 goto out; 1643 goto out;
1644 update_size = false;
1617 ret = 0; 1645 ret = 0;
1618 goto out; 1646 goto out;
1619} 1647}
@@ -2027,6 +2055,15 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
2027 if (ret) 2055 if (ret)
2028 break; 2056 break;
2029 } 2057 }
2058
2059 if (key.type == BTRFS_DIR_INDEX_KEY &&
2060 wc->stage == LOG_WALK_REPLAY_DIR_INDEX) {
2061 ret = replay_one_dir_item(wc->trans, root, path,
2062 eb, i, &key);
2063 if (ret)
2064 break;
2065 }
2066
2030 if (wc->stage < LOG_WALK_REPLAY_ALL) 2067 if (wc->stage < LOG_WALK_REPLAY_ALL)
2031 continue; 2068 continue;
2032 2069
@@ -2048,8 +2085,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
2048 eb, i, &key); 2085 eb, i, &key);
2049 if (ret) 2086 if (ret)
2050 break; 2087 break;
2051 } else if (key.type == BTRFS_DIR_ITEM_KEY || 2088 } else if (key.type == BTRFS_DIR_ITEM_KEY) {
2052 key.type == BTRFS_DIR_INDEX_KEY) {
2053 ret = replay_one_dir_item(wc->trans, root, path, 2089 ret = replay_one_dir_item(wc->trans, root, path,
2054 eb, i, &key); 2090 eb, i, &key);
2055 if (ret) 2091 if (ret)
@@ -3805,6 +3841,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
3805 int ret = 0; 3841 int ret = 0;
3806 struct btrfs_root *root; 3842 struct btrfs_root *root;
3807 struct dentry *old_parent = NULL; 3843 struct dentry *old_parent = NULL;
3844 struct inode *orig_inode = inode;
3808 3845
3809 /* 3846 /*
3810 * for regular files, if its inode is already on disk, we don't 3847 * for regular files, if its inode is already on disk, we don't
@@ -3824,7 +3861,14 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
3824 } 3861 }
3825 3862
3826 while (1) { 3863 while (1) {
3827 BTRFS_I(inode)->logged_trans = trans->transid; 3864 /*
3865 * If we are logging a directory then we start with our inode,
3866 * not our parents inode, so we need to skipp setting the
3867 * logged_trans so that further down in the log code we don't
3868 * think this inode has already been logged.
3869 */
3870 if (inode != orig_inode)
3871 BTRFS_I(inode)->logged_trans = trans->transid;
3828 smp_mb(); 3872 smp_mb();
3829 3873
3830 if (BTRFS_I(inode)->last_unlink_trans > last_committed) { 3874 if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0052ca8264d9..043b215769c2 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -796,7 +796,8 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
796 fs_devices->rotating = 1; 796 fs_devices->rotating = 1;
797 797
798 fs_devices->open_devices++; 798 fs_devices->open_devices++;
799 if (device->writeable && !device->is_tgtdev_for_dev_replace) { 799 if (device->writeable &&
800 device->devid != BTRFS_DEV_REPLACE_DEVID) {
800 fs_devices->rw_devices++; 801 fs_devices->rw_devices++;
801 list_add(&device->dev_alloc_list, 802 list_add(&device->dev_alloc_list,
802 &fs_devices->alloc_list); 803 &fs_devices->alloc_list);
@@ -911,9 +912,9 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
911 if (disk_super->label[0]) { 912 if (disk_super->label[0]) {
912 if (disk_super->label[BTRFS_LABEL_SIZE - 1]) 913 if (disk_super->label[BTRFS_LABEL_SIZE - 1])
913 disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; 914 disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
914 printk(KERN_INFO "device label %s ", disk_super->label); 915 printk(KERN_INFO "btrfs: device label %s ", disk_super->label);
915 } else { 916 } else {
916 printk(KERN_INFO "device fsid %pU ", disk_super->fsid); 917 printk(KERN_INFO "btrfs: device fsid %pU ", disk_super->fsid);
917 } 918 }
918 919
919 printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path); 920 printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
@@ -1715,6 +1716,7 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info,
1715 struct btrfs_device *srcdev) 1716 struct btrfs_device *srcdev)
1716{ 1717{
1717 WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex)); 1718 WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex));
1719
1718 list_del_rcu(&srcdev->dev_list); 1720 list_del_rcu(&srcdev->dev_list);
1719 list_del_rcu(&srcdev->dev_alloc_list); 1721 list_del_rcu(&srcdev->dev_alloc_list);
1720 fs_info->fs_devices->num_devices--; 1722 fs_info->fs_devices->num_devices--;
@@ -1724,9 +1726,13 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info,
1724 } 1726 }
1725 if (srcdev->can_discard) 1727 if (srcdev->can_discard)
1726 fs_info->fs_devices->num_can_discard--; 1728 fs_info->fs_devices->num_can_discard--;
1727 if (srcdev->bdev) 1729 if (srcdev->bdev) {
1728 fs_info->fs_devices->open_devices--; 1730 fs_info->fs_devices->open_devices--;
1729 1731
1732 /* zero out the old super */
1733 btrfs_scratch_superblock(srcdev);
1734 }
1735
1730 call_rcu(&srcdev->rcu, free_device); 1736 call_rcu(&srcdev->rcu, free_device);
1731} 1737}
1732 1738
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 25badd1aec5c..f4a08d7fa2f7 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -56,7 +56,7 @@ void __cachefiles_printk_object(struct cachefiles_object *object,
56 object->fscache.cookie->parent, 56 object->fscache.cookie->parent,
57 object->fscache.cookie->netfs_data, 57 object->fscache.cookie->netfs_data,
58 object->fscache.cookie->flags); 58 object->fscache.cookie->flags);
59 if (keybuf) 59 if (keybuf && cookie->def)
60 keylen = cookie->def->get_key(cookie->netfs_data, keybuf, 60 keylen = cookie->def->get_key(cookie->netfs_data, keybuf,
61 CACHEFILES_KEYBUF_SIZE); 61 CACHEFILES_KEYBUF_SIZE);
62 else 62 else
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index 34c88b83e39f..12b0eef84183 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -162,8 +162,9 @@ int cachefiles_update_object_xattr(struct cachefiles_object *object,
162int cachefiles_check_auxdata(struct cachefiles_object *object) 162int cachefiles_check_auxdata(struct cachefiles_object *object)
163{ 163{
164 struct cachefiles_xattr *auxbuf; 164 struct cachefiles_xattr *auxbuf;
165 enum fscache_checkaux validity;
165 struct dentry *dentry = object->dentry; 166 struct dentry *dentry = object->dentry;
166 unsigned int dlen; 167 ssize_t xlen;
167 int ret; 168 int ret;
168 169
169 ASSERT(dentry); 170 ASSERT(dentry);
@@ -174,22 +175,22 @@ int cachefiles_check_auxdata(struct cachefiles_object *object)
174 if (!auxbuf) 175 if (!auxbuf)
175 return -ENOMEM; 176 return -ENOMEM;
176 177
177 auxbuf->len = vfs_getxattr(dentry, cachefiles_xattr_cache, 178 xlen = vfs_getxattr(dentry, cachefiles_xattr_cache,
178 &auxbuf->type, 512 + 1); 179 &auxbuf->type, 512 + 1);
179 if (auxbuf->len < 1) 180 ret = -ESTALE;
180 return -ESTALE; 181 if (xlen < 1 ||
181 182 auxbuf->type != object->fscache.cookie->def->type)
182 if (auxbuf->type != object->fscache.cookie->def->type) 183 goto error;
183 return -ESTALE;
184 184
185 dlen = auxbuf->len - 1; 185 xlen--;
186 ret = fscache_check_aux(&object->fscache, &auxbuf->data, dlen); 186 validity = fscache_check_aux(&object->fscache, &auxbuf->data, xlen);
187 if (validity != FSCACHE_CHECKAUX_OKAY)
188 goto error;
187 189
190 ret = 0;
191error:
188 kfree(auxbuf); 192 kfree(auxbuf);
189 if (ret != FSCACHE_CHECKAUX_OKAY) 193 return ret;
190 return -ESTALE;
191
192 return 0;
193} 194}
194 195
195/* 196/*
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index ea723a5e8226..6d0b07217ac9 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -132,5 +132,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
132extern const struct export_operations cifs_export_ops; 132extern const struct export_operations cifs_export_ops;
133#endif /* CONFIG_CIFS_NFSD_EXPORT */ 133#endif /* CONFIG_CIFS_NFSD_EXPORT */
134 134
135#define CIFS_VERSION "2.01" 135#define CIFS_VERSION "2.02"
136#endif /* _CIFSFS_H */ 136#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index cfa14c80ef3b..52b6f6c26bfc 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -547,9 +547,6 @@ struct TCP_Server_Info {
547 unsigned int max_rw; /* maxRw specifies the maximum */ 547 unsigned int max_rw; /* maxRw specifies the maximum */
548 /* message size the server can send or receive for */ 548 /* message size the server can send or receive for */
549 /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */ 549 /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */
550 unsigned int max_vcs; /* maximum number of smb sessions, at least
551 those that can be specified uniquely with
552 vcnumbers */
553 unsigned int capabilities; /* selective disabling of caps by smb sess */ 550 unsigned int capabilities; /* selective disabling of caps by smb sess */
554 int timeAdj; /* Adjust for difference in server time zone in sec */ 551 int timeAdj; /* Adjust for difference in server time zone in sec */
555 __u64 CurrentMid; /* multiplex id - rotating counter */ 552 __u64 CurrentMid; /* multiplex id - rotating counter */
@@ -715,7 +712,6 @@ struct cifs_ses {
715 enum statusEnum status; 712 enum statusEnum status;
716 unsigned overrideSecFlg; /* if non-zero override global sec flags */ 713 unsigned overrideSecFlg; /* if non-zero override global sec flags */
717 __u16 ipc_tid; /* special tid for connection to IPC share */ 714 __u16 ipc_tid; /* special tid for connection to IPC share */
718 __u16 vcnum;
719 char *serverOS; /* name of operating system underlying server */ 715 char *serverOS; /* name of operating system underlying server */
720 char *serverNOS; /* name of network operating system of server */ 716 char *serverNOS; /* name of network operating system of server */
721 char *serverDomain; /* security realm of server */ 717 char *serverDomain; /* security realm of server */
@@ -1272,6 +1268,7 @@ struct dfs_info3_param {
1272#define CIFS_FATTR_DELETE_PENDING 0x2 1268#define CIFS_FATTR_DELETE_PENDING 0x2
1273#define CIFS_FATTR_NEED_REVAL 0x4 1269#define CIFS_FATTR_NEED_REVAL 0x4
1274#define CIFS_FATTR_INO_COLLISION 0x8 1270#define CIFS_FATTR_INO_COLLISION 0x8
1271#define CIFS_FATTR_UNKNOWN_NLINK 0x10
1275 1272
1276struct cifs_fattr { 1273struct cifs_fattr {
1277 u32 cf_flags; 1274 u32 cf_flags;
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 948676db8e2e..a630475e421c 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -2652,26 +2652,7 @@ typedef struct file_xattr_info {
2652} __attribute__((packed)) FILE_XATTR_INFO; /* extended attribute info 2652} __attribute__((packed)) FILE_XATTR_INFO; /* extended attribute info
2653 level 0x205 */ 2653 level 0x205 */
2654 2654
2655 2655/* flags for lsattr and chflags commands removed arein uapi/linux/fs.h */
2656/* flags for chattr command */
2657#define EXT_SECURE_DELETE 0x00000001 /* EXT3_SECRM_FL */
2658#define EXT_ENABLE_UNDELETE 0x00000002 /* EXT3_UNRM_FL */
2659/* Reserved for compress file 0x4 */
2660#define EXT_SYNCHRONOUS 0x00000008 /* EXT3_SYNC_FL */
2661#define EXT_IMMUTABLE_FL 0x00000010 /* EXT3_IMMUTABLE_FL */
2662#define EXT_OPEN_APPEND_ONLY 0x00000020 /* EXT3_APPEND_FL */
2663#define EXT_DO_NOT_BACKUP 0x00000040 /* EXT3_NODUMP_FL */
2664#define EXT_NO_UPDATE_ATIME 0x00000080 /* EXT3_NOATIME_FL */
2665/* 0x100 through 0x800 reserved for compression flags and are GET-ONLY */
2666#define EXT_HASH_TREE_INDEXED_DIR 0x00001000 /* GET-ONLY EXT3_INDEX_FL */
2667/* 0x2000 reserved for IMAGIC_FL */
2668#define EXT_JOURNAL_THIS_FILE 0x00004000 /* GET-ONLY EXT3_JOURNAL_DATA_FL */
2669/* 0x8000 reserved for EXT3_NOTAIL_FL */
2670#define EXT_SYNCHRONOUS_DIR 0x00010000 /* EXT3_DIRSYNC_FL */
2671#define EXT_TOPDIR 0x00020000 /* EXT3_TOPDIR_FL */
2672
2673#define EXT_SET_MASK 0x000300FF
2674#define EXT_GET_MASK 0x0003DFFF
2675 2656
2676typedef struct file_chattr_info { 2657typedef struct file_chattr_info {
2677 __le64 mask; /* list of all possible attribute bits */ 2658 __le64 mask; /* list of all possible attribute bits */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index a3d74fea1623..4baf35949b51 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -463,7 +463,6 @@ decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr)
463 cifs_max_pending); 463 cifs_max_pending);
464 set_credits(server, server->maxReq); 464 set_credits(server, server->maxReq);
465 server->maxBuf = le16_to_cpu(rsp->MaxBufSize); 465 server->maxBuf = le16_to_cpu(rsp->MaxBufSize);
466 server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs);
467 /* even though we do not use raw we might as well set this 466 /* even though we do not use raw we might as well set this
468 accurately, in case we ever find a need for it */ 467 accurately, in case we ever find a need for it */
469 if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) { 468 if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) {
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index d3e2eaa503a6..5384c2a640ca 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -500,6 +500,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
500 if (server->ops->close) 500 if (server->ops->close)
501 server->ops->close(xid, tcon, &fid); 501 server->ops->close(xid, tcon, &fid);
502 cifs_del_pending_open(&open); 502 cifs_del_pending_open(&open);
503 fput(file);
503 rc = -ENOMEM; 504 rc = -ENOMEM;
504 } 505 }
505 506
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index eb955b525e55..7ddddf2e2504 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3254,6 +3254,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
3254 /* 3254 /*
3255 * Reads as many pages as possible from fscache. Returns -ENOBUFS 3255 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3256 * immediately if the cookie is negative 3256 * immediately if the cookie is negative
3257 *
3258 * After this point, every page in the list might have PG_fscache set,
3259 * so we will need to clean that up off of every page we don't use.
3257 */ 3260 */
3258 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list, 3261 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3259 &num_pages); 3262 &num_pages);
@@ -3376,6 +3379,11 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
3376 kref_put(&rdata->refcount, cifs_readdata_release); 3379 kref_put(&rdata->refcount, cifs_readdata_release);
3377 } 3380 }
3378 3381
3382 /* Any pages that have been shown to fscache but didn't get added to
3383 * the pagecache must be uncached before they get returned to the
3384 * allocator.
3385 */
3386 cifs_fscache_readpages_cancel(mapping->host, page_list);
3379 return rc; 3387 return rc;
3380} 3388}
3381 3389
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index 2f4bc5a58054..b3258f35e88a 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -223,6 +223,13 @@ void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
223 fscache_uncache_page(CIFS_I(inode)->fscache, page); 223 fscache_uncache_page(CIFS_I(inode)->fscache, page);
224} 224}
225 225
226void __cifs_fscache_readpages_cancel(struct inode *inode, struct list_head *pages)
227{
228 cifs_dbg(FYI, "%s: (fsc: %p, i: %p)\n",
229 __func__, CIFS_I(inode)->fscache, inode);
230 fscache_readpages_cancel(CIFS_I(inode)->fscache, pages);
231}
232
226void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode) 233void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode)
227{ 234{
228 struct cifsInodeInfo *cifsi = CIFS_I(inode); 235 struct cifsInodeInfo *cifsi = CIFS_I(inode);
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
index 63539323e0b9..24794b6cd8ec 100644
--- a/fs/cifs/fscache.h
+++ b/fs/cifs/fscache.h
@@ -54,6 +54,7 @@ extern int __cifs_readpages_from_fscache(struct inode *,
54 struct address_space *, 54 struct address_space *,
55 struct list_head *, 55 struct list_head *,
56 unsigned *); 56 unsigned *);
57extern void __cifs_fscache_readpages_cancel(struct inode *, struct list_head *);
57 58
58extern void __cifs_readpage_to_fscache(struct inode *, struct page *); 59extern void __cifs_readpage_to_fscache(struct inode *, struct page *);
59 60
@@ -91,6 +92,13 @@ static inline void cifs_readpage_to_fscache(struct inode *inode,
91 __cifs_readpage_to_fscache(inode, page); 92 __cifs_readpage_to_fscache(inode, page);
92} 93}
93 94
95static inline void cifs_fscache_readpages_cancel(struct inode *inode,
96 struct list_head *pages)
97{
98 if (CIFS_I(inode)->fscache)
99 return __cifs_fscache_readpages_cancel(inode, pages);
100}
101
94#else /* CONFIG_CIFS_FSCACHE */ 102#else /* CONFIG_CIFS_FSCACHE */
95static inline int cifs_fscache_register(void) { return 0; } 103static inline int cifs_fscache_register(void) { return 0; }
96static inline void cifs_fscache_unregister(void) {} 104static inline void cifs_fscache_unregister(void) {}
@@ -131,6 +139,11 @@ static inline int cifs_readpages_from_fscache(struct inode *inode,
131static inline void cifs_readpage_to_fscache(struct inode *inode, 139static inline void cifs_readpage_to_fscache(struct inode *inode,
132 struct page *page) {} 140 struct page *page) {}
133 141
142static inline void cifs_fscache_readpages_cancel(struct inode *inode,
143 struct list_head *pages)
144{
145}
146
134#endif /* CONFIG_CIFS_FSCACHE */ 147#endif /* CONFIG_CIFS_FSCACHE */
135 148
136#endif /* _CIFS_FSCACHE_H */ 149#endif /* _CIFS_FSCACHE_H */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index f9ff9c173f78..867b7cdc794a 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -120,6 +120,33 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr)
120 cifs_i->invalid_mapping = true; 120 cifs_i->invalid_mapping = true;
121} 121}
122 122
123/*
124 * copy nlink to the inode, unless it wasn't provided. Provide
125 * sane values if we don't have an existing one and none was provided
126 */
127static void
128cifs_nlink_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
129{
130 /*
131 * if we're in a situation where we can't trust what we
132 * got from the server (readdir, some non-unix cases)
133 * fake reasonable values
134 */
135 if (fattr->cf_flags & CIFS_FATTR_UNKNOWN_NLINK) {
136 /* only provide fake values on a new inode */
137 if (inode->i_state & I_NEW) {
138 if (fattr->cf_cifsattrs & ATTR_DIRECTORY)
139 set_nlink(inode, 2);
140 else
141 set_nlink(inode, 1);
142 }
143 return;
144 }
145
146 /* we trust the server, so update it */
147 set_nlink(inode, fattr->cf_nlink);
148}
149
123/* populate an inode with info from a cifs_fattr struct */ 150/* populate an inode with info from a cifs_fattr struct */
124void 151void
125cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) 152cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
@@ -134,7 +161,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
134 inode->i_mtime = fattr->cf_mtime; 161 inode->i_mtime = fattr->cf_mtime;
135 inode->i_ctime = fattr->cf_ctime; 162 inode->i_ctime = fattr->cf_ctime;
136 inode->i_rdev = fattr->cf_rdev; 163 inode->i_rdev = fattr->cf_rdev;
137 set_nlink(inode, fattr->cf_nlink); 164 cifs_nlink_fattr_to_inode(inode, fattr);
138 inode->i_uid = fattr->cf_uid; 165 inode->i_uid = fattr->cf_uid;
139 inode->i_gid = fattr->cf_gid; 166 inode->i_gid = fattr->cf_gid;
140 167
@@ -541,6 +568,7 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
541 fattr->cf_bytes = le64_to_cpu(info->AllocationSize); 568 fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
542 fattr->cf_createtime = le64_to_cpu(info->CreationTime); 569 fattr->cf_createtime = le64_to_cpu(info->CreationTime);
543 570
571 fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
544 if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { 572 if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
545 fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; 573 fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
546 fattr->cf_dtype = DT_DIR; 574 fattr->cf_dtype = DT_DIR;
@@ -548,7 +576,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
548 * Server can return wrong NumberOfLinks value for directories 576 * Server can return wrong NumberOfLinks value for directories
549 * when Unix extensions are disabled - fake it. 577 * when Unix extensions are disabled - fake it.
550 */ 578 */
551 fattr->cf_nlink = 2; 579 if (!tcon->unix_ext)
580 fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK;
552 } else if (fattr->cf_cifsattrs & ATTR_REPARSE) { 581 } else if (fattr->cf_cifsattrs & ATTR_REPARSE) {
553 fattr->cf_mode = S_IFLNK; 582 fattr->cf_mode = S_IFLNK;
554 fattr->cf_dtype = DT_LNK; 583 fattr->cf_dtype = DT_LNK;
@@ -561,11 +590,15 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
561 if (fattr->cf_cifsattrs & ATTR_READONLY) 590 if (fattr->cf_cifsattrs & ATTR_READONLY)
562 fattr->cf_mode &= ~(S_IWUGO); 591 fattr->cf_mode &= ~(S_IWUGO);
563 592
564 fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); 593 /*
565 if (fattr->cf_nlink < 1) { 594 * Don't accept zero nlink from non-unix servers unless
566 cifs_dbg(1, "replacing bogus file nlink value %u\n", 595 * delete is pending. Instead mark it as unknown.
596 */
597 if ((fattr->cf_nlink < 1) && !tcon->unix_ext &&
598 !info->DeletePending) {
599 cifs_dbg(1, "bogus file nlink value %u\n",
567 fattr->cf_nlink); 600 fattr->cf_nlink);
568 fattr->cf_nlink = 1; 601 fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK;
569 } 602 }
570 } 603 }
571 604
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 42ef03be089f..53a75f3d0179 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -180,6 +180,9 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
180 fattr->cf_dtype = DT_REG; 180 fattr->cf_dtype = DT_REG;
181 } 181 }
182 182
183 /* non-unix readdir doesn't provide nlink */
184 fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK;
185
183 if (fattr->cf_cifsattrs & ATTR_READONLY) 186 if (fattr->cf_cifsattrs & ATTR_READONLY)
184 fattr->cf_mode &= ~S_IWUGO; 187 fattr->cf_mode &= ~S_IWUGO;
185 188
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 5f99b7f19e78..352358de1d7e 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -32,88 +32,6 @@
32#include <linux/slab.h> 32#include <linux/slab.h>
33#include "cifs_spnego.h" 33#include "cifs_spnego.h"
34 34
35/*
36 * Checks if this is the first smb session to be reconnected after
37 * the socket has been reestablished (so we know whether to use vc 0).
38 * Called while holding the cifs_tcp_ses_lock, so do not block
39 */
40static bool is_first_ses_reconnect(struct cifs_ses *ses)
41{
42 struct list_head *tmp;
43 struct cifs_ses *tmp_ses;
44
45 list_for_each(tmp, &ses->server->smb_ses_list) {
46 tmp_ses = list_entry(tmp, struct cifs_ses,
47 smb_ses_list);
48 if (tmp_ses->need_reconnect == false)
49 return false;
50 }
51 /* could not find a session that was already connected,
52 this must be the first one we are reconnecting */
53 return true;
54}
55
56/*
57 * vc number 0 is treated specially by some servers, and should be the
58 * first one we request. After that we can use vcnumbers up to maxvcs,
59 * one for each smb session (some Windows versions set maxvcs incorrectly
60 * so maxvc=1 can be ignored). If we have too many vcs, we can reuse
61 * any vc but zero (some servers reset the connection on vcnum zero)
62 *
63 */
64static __le16 get_next_vcnum(struct cifs_ses *ses)
65{
66 __u16 vcnum = 0;
67 struct list_head *tmp;
68 struct cifs_ses *tmp_ses;
69 __u16 max_vcs = ses->server->max_vcs;
70 __u16 i;
71 int free_vc_found = 0;
72
73 /* Quoting the MS-SMB specification: "Windows-based SMB servers set this
74 field to one but do not enforce this limit, which allows an SMB client
75 to establish more virtual circuits than allowed by this value ... but
76 other server implementations can enforce this limit." */
77 if (max_vcs < 2)
78 max_vcs = 0xFFFF;
79
80 spin_lock(&cifs_tcp_ses_lock);
81 if ((ses->need_reconnect) && is_first_ses_reconnect(ses))
82 goto get_vc_num_exit; /* vcnum will be zero */
83 for (i = ses->server->srv_count - 1; i < max_vcs; i++) {
84 if (i == 0) /* this is the only connection, use vc 0 */
85 break;
86
87 free_vc_found = 1;
88
89 list_for_each(tmp, &ses->server->smb_ses_list) {
90 tmp_ses = list_entry(tmp, struct cifs_ses,
91 smb_ses_list);
92 if (tmp_ses->vcnum == i) {
93 free_vc_found = 0;
94 break; /* found duplicate, try next vcnum */
95 }
96 }
97 if (free_vc_found)
98 break; /* we found a vcnumber that will work - use it */
99 }
100
101 if (i == 0)
102 vcnum = 0; /* for most common case, ie if one smb session, use
103 vc zero. Also for case when no free vcnum, zero
104 is safest to send (some clients only send zero) */
105 else if (free_vc_found == 0)
106 vcnum = 1; /* we can not reuse vc=0 safely, since some servers
107 reset all uids on that, but 1 is ok. */
108 else
109 vcnum = i;
110 ses->vcnum = vcnum;
111get_vc_num_exit:
112 spin_unlock(&cifs_tcp_ses_lock);
113
114 return cpu_to_le16(vcnum);
115}
116
117static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB) 35static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
118{ 36{
119 __u32 capabilities = 0; 37 __u32 capabilities = 0;
@@ -128,7 +46,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
128 CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4, 46 CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4,
129 USHRT_MAX)); 47 USHRT_MAX));
130 pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq); 48 pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
131 pSMB->req.VcNumber = get_next_vcnum(ses); 49 pSMB->req.VcNumber = __constant_cpu_to_le16(1);
132 50
133 /* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */ 51 /* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
134 52
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 318e8433527c..b2a86e324aac 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -586,7 +586,8 @@ int __fscache_check_consistency(struct fscache_cookie *cookie)
586 586
587 fscache_operation_init(op, NULL, NULL); 587 fscache_operation_init(op, NULL, NULL);
588 op->flags = FSCACHE_OP_MYTHREAD | 588 op->flags = FSCACHE_OP_MYTHREAD |
589 (1 << FSCACHE_OP_WAITING); 589 (1 << FSCACHE_OP_WAITING) |
590 (1 << FSCACHE_OP_UNUSE_COOKIE);
590 591
591 spin_lock(&cookie->lock); 592 spin_lock(&cookie->lock);
592 593
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 62b43b577bfc..b7989f2ab4c4 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -182,6 +182,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
182 struct inode *inode; 182 struct inode *inode;
183 struct dentry *parent; 183 struct dentry *parent;
184 struct fuse_conn *fc; 184 struct fuse_conn *fc;
185 struct fuse_inode *fi;
185 int ret; 186 int ret;
186 187
187 inode = ACCESS_ONCE(entry->d_inode); 188 inode = ACCESS_ONCE(entry->d_inode);
@@ -228,7 +229,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
228 if (!err && !outarg.nodeid) 229 if (!err && !outarg.nodeid)
229 err = -ENOENT; 230 err = -ENOENT;
230 if (!err) { 231 if (!err) {
231 struct fuse_inode *fi = get_fuse_inode(inode); 232 fi = get_fuse_inode(inode);
232 if (outarg.nodeid != get_node_id(inode)) { 233 if (outarg.nodeid != get_node_id(inode)) {
233 fuse_queue_forget(fc, forget, outarg.nodeid, 1); 234 fuse_queue_forget(fc, forget, outarg.nodeid, 1);
234 goto invalid; 235 goto invalid;
@@ -246,8 +247,11 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
246 attr_version); 247 attr_version);
247 fuse_change_entry_timeout(entry, &outarg); 248 fuse_change_entry_timeout(entry, &outarg);
248 } else if (inode) { 249 } else if (inode) {
249 fc = get_fuse_conn(inode); 250 fi = get_fuse_inode(inode);
250 if (fc->readdirplus_auto) { 251 if (flags & LOOKUP_RCU) {
252 if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
253 return -ECHILD;
254 } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
251 parent = dget_parent(entry); 255 parent = dget_parent(entry);
252 fuse_advise_use_readdirplus(parent->d_inode); 256 fuse_advise_use_readdirplus(parent->d_inode);
253 dput(parent); 257 dput(parent);
@@ -259,7 +263,8 @@ out:
259 263
260invalid: 264invalid:
261 ret = 0; 265 ret = 0;
262 if (check_submounts_and_drop(entry) != 0) 266
267 if (!(flags & LOOKUP_RCU) && check_submounts_and_drop(entry) != 0)
263 ret = 1; 268 ret = 1;
264 goto out; 269 goto out;
265} 270}
@@ -1063,6 +1068,8 @@ static int fuse_access(struct inode *inode, int mask)
1063 struct fuse_access_in inarg; 1068 struct fuse_access_in inarg;
1064 int err; 1069 int err;
1065 1070
1071 BUG_ON(mask & MAY_NOT_BLOCK);
1072
1066 if (fc->no_access) 1073 if (fc->no_access)
1067 return 0; 1074 return 0;
1068 1075
@@ -1150,9 +1157,6 @@ static int fuse_permission(struct inode *inode, int mask)
1150 noticed immediately, only after the attribute 1157 noticed immediately, only after the attribute
1151 timeout has expired */ 1158 timeout has expired */
1152 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) { 1159 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1153 if (mask & MAY_NOT_BLOCK)
1154 return -ECHILD;
1155
1156 err = fuse_access(inode, mask); 1160 err = fuse_access(inode, mask);
1157 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { 1161 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1158 if (!(inode->i_mode & S_IXUGO)) { 1162 if (!(inode->i_mode & S_IXUGO)) {
@@ -1291,6 +1295,8 @@ static int fuse_direntplus_link(struct file *file,
1291 } 1295 }
1292 1296
1293found: 1297found:
1298 if (fc->readdirplus_auto)
1299 set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
1294 fuse_change_entry_timeout(dentry, o); 1300 fuse_change_entry_timeout(dentry, o);
1295 1301
1296 err = 0; 1302 err = 0;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index d409deafc67b..4598345ab87d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2467,6 +2467,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2467{ 2467{
2468 struct fuse_file *ff = file->private_data; 2468 struct fuse_file *ff = file->private_data;
2469 struct inode *inode = file->f_inode; 2469 struct inode *inode = file->f_inode;
2470 struct fuse_inode *fi = get_fuse_inode(inode);
2470 struct fuse_conn *fc = ff->fc; 2471 struct fuse_conn *fc = ff->fc;
2471 struct fuse_req *req; 2472 struct fuse_req *req;
2472 struct fuse_fallocate_in inarg = { 2473 struct fuse_fallocate_in inarg = {
@@ -2484,10 +2485,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2484 2485
2485 if (lock_inode) { 2486 if (lock_inode) {
2486 mutex_lock(&inode->i_mutex); 2487 mutex_lock(&inode->i_mutex);
2487 if (mode & FALLOC_FL_PUNCH_HOLE) 2488 if (mode & FALLOC_FL_PUNCH_HOLE) {
2488 fuse_set_nowrite(inode); 2489 loff_t endbyte = offset + length - 1;
2490 err = filemap_write_and_wait_range(inode->i_mapping,
2491 offset, endbyte);
2492 if (err)
2493 goto out;
2494
2495 fuse_sync_writes(inode);
2496 }
2489 } 2497 }
2490 2498
2499 if (!(mode & FALLOC_FL_KEEP_SIZE))
2500 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2501
2491 req = fuse_get_req_nopages(fc); 2502 req = fuse_get_req_nopages(fc);
2492 if (IS_ERR(req)) { 2503 if (IS_ERR(req)) {
2493 err = PTR_ERR(req); 2504 err = PTR_ERR(req);
@@ -2520,11 +2531,11 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2520 fuse_invalidate_attr(inode); 2531 fuse_invalidate_attr(inode);
2521 2532
2522out: 2533out:
2523 if (lock_inode) { 2534 if (!(mode & FALLOC_FL_KEEP_SIZE))
2524 if (mode & FALLOC_FL_PUNCH_HOLE) 2535 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2525 fuse_release_nowrite(inode); 2536
2537 if (lock_inode)
2526 mutex_unlock(&inode->i_mutex); 2538 mutex_unlock(&inode->i_mutex);
2527 }
2528 2539
2529 return err; 2540 return err;
2530} 2541}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 5ced199b50bb..5b9e6f3b6aef 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -115,6 +115,8 @@ struct fuse_inode {
115enum { 115enum {
116 /** Advise readdirplus */ 116 /** Advise readdirplus */
117 FUSE_I_ADVISE_RDPLUS, 117 FUSE_I_ADVISE_RDPLUS,
118 /** Initialized with readdirplus */
119 FUSE_I_INIT_RDPLUS,
118 /** An operation changing file size is in progress */ 120 /** An operation changing file size is in progress */
119 FUSE_I_SIZE_UNSTABLE, 121 FUSE_I_SIZE_UNSTABLE,
120}; 122};
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 64915eeae5a7..ced3257f06e8 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -694,8 +694,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
694 694
695 mark_inode_dirty(inode); 695 mark_inode_dirty(inode);
696 d_instantiate(dentry, inode); 696 d_instantiate(dentry, inode);
697 if (file) 697 if (file) {
698 *opened |= FILE_CREATED;
698 error = finish_open(file, dentry, gfs2_open_common, opened); 699 error = finish_open(file, dentry, gfs2_open_common, opened);
700 }
699 gfs2_glock_dq_uninit(ghs); 701 gfs2_glock_dq_uninit(ghs);
700 gfs2_glock_dq_uninit(ghs + 1); 702 gfs2_glock_dq_uninit(ghs + 1);
701 return error; 703 return error;
diff --git a/fs/namei.c b/fs/namei.c
index 0dc4cbf21f37..645268f23eb6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2656,6 +2656,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2656 int acc_mode; 2656 int acc_mode;
2657 int create_error = 0; 2657 int create_error = 0;
2658 struct dentry *const DENTRY_NOT_SET = (void *) -1UL; 2658 struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
2659 bool excl;
2659 2660
2660 BUG_ON(dentry->d_inode); 2661 BUG_ON(dentry->d_inode);
2661 2662
@@ -2669,10 +2670,9 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2669 if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) 2670 if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
2670 mode &= ~current_umask(); 2671 mode &= ~current_umask();
2671 2672
2672 if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) { 2673 excl = (open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT);
2674 if (excl)
2673 open_flag &= ~O_TRUNC; 2675 open_flag &= ~O_TRUNC;
2674 *opened |= FILE_CREATED;
2675 }
2676 2676
2677 /* 2677 /*
2678 * Checking write permission is tricky, bacuse we don't know if we are 2678 * Checking write permission is tricky, bacuse we don't know if we are
@@ -2725,12 +2725,6 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2725 goto out; 2725 goto out;
2726 } 2726 }
2727 2727
2728 acc_mode = op->acc_mode;
2729 if (*opened & FILE_CREATED) {
2730 fsnotify_create(dir, dentry);
2731 acc_mode = MAY_OPEN;
2732 }
2733
2734 if (error) { /* returned 1, that is */ 2728 if (error) { /* returned 1, that is */
2735 if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) { 2729 if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
2736 error = -EIO; 2730 error = -EIO;
@@ -2740,9 +2734,19 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2740 dput(dentry); 2734 dput(dentry);
2741 dentry = file->f_path.dentry; 2735 dentry = file->f_path.dentry;
2742 } 2736 }
2743 if (create_error && dentry->d_inode == NULL) { 2737 if (*opened & FILE_CREATED)
2744 error = create_error; 2738 fsnotify_create(dir, dentry);
2745 goto out; 2739 if (!dentry->d_inode) {
2740 WARN_ON(*opened & FILE_CREATED);
2741 if (create_error) {
2742 error = create_error;
2743 goto out;
2744 }
2745 } else {
2746 if (excl && !(*opened & FILE_CREATED)) {
2747 error = -EEXIST;
2748 goto out;
2749 }
2746 } 2750 }
2747 goto looked_up; 2751 goto looked_up;
2748 } 2752 }
@@ -2751,6 +2755,12 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2751 * We didn't have the inode before the open, so check open permission 2755 * We didn't have the inode before the open, so check open permission
2752 * here. 2756 * here.
2753 */ 2757 */
2758 acc_mode = op->acc_mode;
2759 if (*opened & FILE_CREATED) {
2760 WARN_ON(!(open_flag & O_CREAT));
2761 fsnotify_create(dir, dentry);
2762 acc_mode = MAY_OPEN;
2763 }
2754 error = may_open(&file->f_path, acc_mode, open_flag); 2764 error = may_open(&file->f_path, acc_mode, open_flag);
2755 if (error) 2765 if (error)
2756 fput(file); 2766 fput(file);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index de434f309af0..02b0df769e2d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1392,6 +1392,9 @@ static int nfs_finish_open(struct nfs_open_context *ctx,
1392{ 1392{
1393 int err; 1393 int err;
1394 1394
1395 if ((open_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
1396 *opened |= FILE_CREATED;
1397
1395 err = finish_open(file, dentry, do_open, opened); 1398 err = finish_open(file, dentry, do_open, opened);
1396 if (err) 1399 if (err)
1397 goto out; 1400 goto out;
@@ -1455,7 +1458,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
1455 1458
1456 trace_nfs_atomic_open_enter(dir, ctx, open_flags); 1459 trace_nfs_atomic_open_enter(dir, ctx, open_flags);
1457 nfs_block_sillyrename(dentry->d_parent); 1460 nfs_block_sillyrename(dentry->d_parent);
1458 inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); 1461 inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, opened);
1459 nfs_unblock_sillyrename(dentry->d_parent); 1462 nfs_unblock_sillyrename(dentry->d_parent);
1460 if (IS_ERR(inode)) { 1463 if (IS_ERR(inode)) {
1461 err = PTR_ERR(inode); 1464 err = PTR_ERR(inode);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index e5b804dd944c..77efaf15ec90 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -19,6 +19,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
19 struct inode *dir; 19 struct inode *dir;
20 unsigned openflags = filp->f_flags; 20 unsigned openflags = filp->f_flags;
21 struct iattr attr; 21 struct iattr attr;
22 int opened = 0;
22 int err; 23 int err;
23 24
24 /* 25 /*
@@ -55,7 +56,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
55 nfs_wb_all(inode); 56 nfs_wb_all(inode);
56 } 57 }
57 58
58 inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); 59 inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr, &opened);
59 if (IS_ERR(inode)) { 60 if (IS_ERR(inode)) {
60 err = PTR_ERR(inode); 61 err = PTR_ERR(inode);
61 switch (err) { 62 switch (err) {
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 95604f64cab8..c7c295e556ed 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -185,6 +185,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
185 if (status) 185 if (status)
186 goto out_put; 186 goto out_put;
187 187
188 smp_wmb();
188 ds->ds_clp = clp; 189 ds->ds_clp = clp;
189 dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); 190 dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
190out: 191out:
@@ -801,34 +802,35 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
801 struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr; 802 struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
802 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; 803 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
803 struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); 804 struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
804 805 struct nfs4_pnfs_ds *ret = ds;
805 if (filelayout_test_devid_unavailable(devid))
806 return NULL;
807 806
808 if (ds == NULL) { 807 if (ds == NULL) {
809 printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", 808 printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
810 __func__, ds_idx); 809 __func__, ds_idx);
811 filelayout_mark_devid_invalid(devid); 810 filelayout_mark_devid_invalid(devid);
812 return NULL; 811 goto out;
813 } 812 }
813 smp_rmb();
814 if (ds->ds_clp) 814 if (ds->ds_clp)
815 return ds; 815 goto out_test_devid;
816 816
817 if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { 817 if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
818 struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); 818 struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
819 int err; 819 int err;
820 820
821 err = nfs4_ds_connect(s, ds); 821 err = nfs4_ds_connect(s, ds);
822 if (err) { 822 if (err)
823 nfs4_mark_deviceid_unavailable(devid); 823 nfs4_mark_deviceid_unavailable(devid);
824 ds = NULL;
825 }
826 nfs4_clear_ds_conn_bit(ds); 824 nfs4_clear_ds_conn_bit(ds);
827 } else { 825 } else {
828 /* Either ds is connected, or ds is NULL */ 826 /* Either ds is connected, or ds is NULL */
829 nfs4_wait_ds_connect(ds); 827 nfs4_wait_ds_connect(ds);
830 } 828 }
831 return ds; 829out_test_devid:
830 if (filelayout_test_devid_unavailable(devid))
831 ret = NULL;
832out:
833 return ret;
832} 834}
833 835
834module_param(dataserver_retrans, uint, 0644); 836module_param(dataserver_retrans, uint, 0644);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 989bb9d3074d..d53d6785cba2 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -912,6 +912,7 @@ struct nfs4_opendata {
912 struct iattr attrs; 912 struct iattr attrs;
913 unsigned long timestamp; 913 unsigned long timestamp;
914 unsigned int rpc_done : 1; 914 unsigned int rpc_done : 1;
915 unsigned int file_created : 1;
915 unsigned int is_recover : 1; 916 unsigned int is_recover : 1;
916 int rpc_status; 917 int rpc_status;
917 int cancelled; 918 int cancelled;
@@ -1946,8 +1947,13 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
1946 1947
1947 nfs_fattr_map_and_free_names(server, &data->f_attr); 1948 nfs_fattr_map_and_free_names(server, &data->f_attr);
1948 1949
1949 if (o_arg->open_flags & O_CREAT) 1950 if (o_arg->open_flags & O_CREAT) {
1950 update_changeattr(dir, &o_res->cinfo); 1951 update_changeattr(dir, &o_res->cinfo);
1952 if (o_arg->open_flags & O_EXCL)
1953 data->file_created = 1;
1954 else if (o_res->cinfo.before != o_res->cinfo.after)
1955 data->file_created = 1;
1956 }
1951 if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) 1957 if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
1952 server->caps &= ~NFS_CAP_POSIX_LOCK; 1958 server->caps &= ~NFS_CAP_POSIX_LOCK;
1953 if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { 1959 if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
@@ -2191,7 +2197,8 @@ static int _nfs4_do_open(struct inode *dir,
2191 struct nfs_open_context *ctx, 2197 struct nfs_open_context *ctx,
2192 int flags, 2198 int flags,
2193 struct iattr *sattr, 2199 struct iattr *sattr,
2194 struct nfs4_label *label) 2200 struct nfs4_label *label,
2201 int *opened)
2195{ 2202{
2196 struct nfs4_state_owner *sp; 2203 struct nfs4_state_owner *sp;
2197 struct nfs4_state *state = NULL; 2204 struct nfs4_state *state = NULL;
@@ -2261,6 +2268,8 @@ static int _nfs4_do_open(struct inode *dir,
2261 nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel); 2268 nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel);
2262 } 2269 }
2263 } 2270 }
2271 if (opendata->file_created)
2272 *opened |= FILE_CREATED;
2264 2273
2265 if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) 2274 if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server))
2266 *ctx_th = opendata->f_attr.mdsthreshold; 2275 *ctx_th = opendata->f_attr.mdsthreshold;
@@ -2289,7 +2298,8 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
2289 struct nfs_open_context *ctx, 2298 struct nfs_open_context *ctx,
2290 int flags, 2299 int flags,
2291 struct iattr *sattr, 2300 struct iattr *sattr,
2292 struct nfs4_label *label) 2301 struct nfs4_label *label,
2302 int *opened)
2293{ 2303{
2294 struct nfs_server *server = NFS_SERVER(dir); 2304 struct nfs_server *server = NFS_SERVER(dir);
2295 struct nfs4_exception exception = { }; 2305 struct nfs4_exception exception = { };
@@ -2297,7 +2307,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
2297 int status; 2307 int status;
2298 2308
2299 do { 2309 do {
2300 status = _nfs4_do_open(dir, ctx, flags, sattr, label); 2310 status = _nfs4_do_open(dir, ctx, flags, sattr, label, opened);
2301 res = ctx->state; 2311 res = ctx->state;
2302 trace_nfs4_open_file(ctx, flags, status); 2312 trace_nfs4_open_file(ctx, flags, status);
2303 if (status == 0) 2313 if (status == 0)
@@ -2659,7 +2669,8 @@ out:
2659} 2669}
2660 2670
2661static struct inode * 2671static struct inode *
2662nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr) 2672nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx,
2673 int open_flags, struct iattr *attr, int *opened)
2663{ 2674{
2664 struct nfs4_state *state; 2675 struct nfs4_state *state;
2665 struct nfs4_label l = {0, 0, 0, NULL}, *label = NULL; 2676 struct nfs4_label l = {0, 0, 0, NULL}, *label = NULL;
@@ -2667,7 +2678,7 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags
2667 label = nfs4_label_init_security(dir, ctx->dentry, attr, &l); 2678 label = nfs4_label_init_security(dir, ctx->dentry, attr, &l);
2668 2679
2669 /* Protect against concurrent sillydeletes */ 2680 /* Protect against concurrent sillydeletes */
2670 state = nfs4_do_open(dir, ctx, open_flags, attr, label); 2681 state = nfs4_do_open(dir, ctx, open_flags, attr, label, opened);
2671 2682
2672 nfs4_label_release_security(label); 2683 nfs4_label_release_security(label);
2673 2684
@@ -3332,6 +3343,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
3332 struct nfs4_label l, *ilabel = NULL; 3343 struct nfs4_label l, *ilabel = NULL;
3333 struct nfs_open_context *ctx; 3344 struct nfs_open_context *ctx;
3334 struct nfs4_state *state; 3345 struct nfs4_state *state;
3346 int opened = 0;
3335 int status = 0; 3347 int status = 0;
3336 3348
3337 ctx = alloc_nfs_open_context(dentry, FMODE_READ); 3349 ctx = alloc_nfs_open_context(dentry, FMODE_READ);
@@ -3341,7 +3353,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
3341 ilabel = nfs4_label_init_security(dir, dentry, sattr, &l); 3353 ilabel = nfs4_label_init_security(dir, dentry, sattr, &l);
3342 3354
3343 sattr->ia_mode &= ~current_umask(); 3355 sattr->ia_mode &= ~current_umask();
3344 state = nfs4_do_open(dir, ctx, flags, sattr, ilabel); 3356 state = nfs4_do_open(dir, ctx, flags, sattr, ilabel, &opened);
3345 if (IS_ERR(state)) { 3357 if (IS_ERR(state)) {
3346 status = PTR_ERR(state); 3358 status = PTR_ERR(state);
3347 goto out; 3359 goto out;
@@ -7564,8 +7576,10 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
7564{ 7576{
7565 int err; 7577 int err;
7566 struct page *page; 7578 struct page *page;
7567 rpc_authflavor_t flavor; 7579 rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR;
7568 struct nfs4_secinfo_flavors *flavors; 7580 struct nfs4_secinfo_flavors *flavors;
7581 struct nfs4_secinfo4 *secinfo;
7582 int i;
7569 7583
7570 page = alloc_page(GFP_KERNEL); 7584 page = alloc_page(GFP_KERNEL);
7571 if (!page) { 7585 if (!page) {
@@ -7587,9 +7601,31 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
7587 if (err) 7601 if (err)
7588 goto out_freepage; 7602 goto out_freepage;
7589 7603
7590 flavor = nfs_find_best_sec(flavors); 7604 for (i = 0; i < flavors->num_flavors; i++) {
7591 if (err == 0) 7605 secinfo = &flavors->flavors[i];
7592 err = nfs4_lookup_root_sec(server, fhandle, info, flavor); 7606
7607 switch (secinfo->flavor) {
7608 case RPC_AUTH_NULL:
7609 case RPC_AUTH_UNIX:
7610 case RPC_AUTH_GSS:
7611 flavor = rpcauth_get_pseudoflavor(secinfo->flavor,
7612 &secinfo->flavor_info);
7613 break;
7614 default:
7615 flavor = RPC_AUTH_MAXFLAVOR;
7616 break;
7617 }
7618
7619 if (flavor != RPC_AUTH_MAXFLAVOR) {
7620 err = nfs4_lookup_root_sec(server, fhandle,
7621 info, flavor);
7622 if (!err)
7623 break;
7624 }
7625 }
7626
7627 if (flavor == RPC_AUTH_MAXFLAVOR)
7628 err = -EPERM;
7593 7629
7594out_freepage: 7630out_freepage:
7595 put_page(page); 7631 put_page(page);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 0ba679866e50..da276640f776 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -94,6 +94,7 @@ void nilfs_forget_buffer(struct buffer_head *bh)
94 clear_buffer_nilfs_volatile(bh); 94 clear_buffer_nilfs_volatile(bh);
95 clear_buffer_nilfs_checked(bh); 95 clear_buffer_nilfs_checked(bh);
96 clear_buffer_nilfs_redirected(bh); 96 clear_buffer_nilfs_redirected(bh);
97 clear_buffer_async_write(bh);
97 clear_buffer_dirty(bh); 98 clear_buffer_dirty(bh);
98 if (nilfs_page_buffers_clean(page)) 99 if (nilfs_page_buffers_clean(page))
99 __nilfs_clear_page_dirty(page); 100 __nilfs_clear_page_dirty(page);
@@ -429,6 +430,7 @@ void nilfs_clear_dirty_page(struct page *page, bool silent)
429 "discard block %llu, size %zu", 430 "discard block %llu, size %zu",
430 (u64)bh->b_blocknr, bh->b_size); 431 (u64)bh->b_blocknr, bh->b_size);
431 } 432 }
433 clear_buffer_async_write(bh);
432 clear_buffer_dirty(bh); 434 clear_buffer_dirty(bh);
433 clear_buffer_nilfs_volatile(bh); 435 clear_buffer_nilfs_volatile(bh);
434 clear_buffer_nilfs_checked(bh); 436 clear_buffer_nilfs_checked(bh);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index bd88a7461063..9f6b486b6c01 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -665,7 +665,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
665 665
666 bh = head = page_buffers(page); 666 bh = head = page_buffers(page);
667 do { 667 do {
668 if (!buffer_dirty(bh)) 668 if (!buffer_dirty(bh) || buffer_async_write(bh))
669 continue; 669 continue;
670 get_bh(bh); 670 get_bh(bh);
671 list_add_tail(&bh->b_assoc_buffers, listp); 671 list_add_tail(&bh->b_assoc_buffers, listp);
@@ -699,7 +699,8 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
699 for (i = 0; i < pagevec_count(&pvec); i++) { 699 for (i = 0; i < pagevec_count(&pvec); i++) {
700 bh = head = page_buffers(pvec.pages[i]); 700 bh = head = page_buffers(pvec.pages[i]);
701 do { 701 do {
702 if (buffer_dirty(bh)) { 702 if (buffer_dirty(bh) &&
703 !buffer_async_write(bh)) {
703 get_bh(bh); 704 get_bh(bh);
704 list_add_tail(&bh->b_assoc_buffers, 705 list_add_tail(&bh->b_assoc_buffers,
705 listp); 706 listp);
@@ -1579,6 +1580,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
1579 1580
1580 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1581 list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1581 b_assoc_buffers) { 1582 b_assoc_buffers) {
1583 set_buffer_async_write(bh);
1582 if (bh->b_page != bd_page) { 1584 if (bh->b_page != bd_page) {
1583 if (bd_page) { 1585 if (bd_page) {
1584 lock_page(bd_page); 1586 lock_page(bd_page);
@@ -1592,6 +1594,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
1592 1594
1593 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1595 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1594 b_assoc_buffers) { 1596 b_assoc_buffers) {
1597 set_buffer_async_write(bh);
1595 if (bh == segbuf->sb_super_root) { 1598 if (bh == segbuf->sb_super_root) {
1596 if (bh->b_page != bd_page) { 1599 if (bh->b_page != bd_page) {
1597 lock_page(bd_page); 1600 lock_page(bd_page);
@@ -1677,6 +1680,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
1677 list_for_each_entry(segbuf, logs, sb_list) { 1680 list_for_each_entry(segbuf, logs, sb_list) {
1678 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1681 list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1679 b_assoc_buffers) { 1682 b_assoc_buffers) {
1683 clear_buffer_async_write(bh);
1680 if (bh->b_page != bd_page) { 1684 if (bh->b_page != bd_page) {
1681 if (bd_page) 1685 if (bd_page)
1682 end_page_writeback(bd_page); 1686 end_page_writeback(bd_page);
@@ -1686,6 +1690,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
1686 1690
1687 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1691 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1688 b_assoc_buffers) { 1692 b_assoc_buffers) {
1693 clear_buffer_async_write(bh);
1689 if (bh == segbuf->sb_super_root) { 1694 if (bh == segbuf->sb_super_root) {
1690 if (bh->b_page != bd_page) { 1695 if (bh->b_page != bd_page) {
1691 end_page_writeback(bd_page); 1696 end_page_writeback(bd_page);
@@ -1755,6 +1760,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1755 b_assoc_buffers) { 1760 b_assoc_buffers) {
1756 set_buffer_uptodate(bh); 1761 set_buffer_uptodate(bh);
1757 clear_buffer_dirty(bh); 1762 clear_buffer_dirty(bh);
1763 clear_buffer_async_write(bh);
1758 if (bh->b_page != bd_page) { 1764 if (bh->b_page != bd_page) {
1759 if (bd_page) 1765 if (bd_page)
1760 end_page_writeback(bd_page); 1766 end_page_writeback(bd_page);
@@ -1776,6 +1782,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1776 b_assoc_buffers) { 1782 b_assoc_buffers) {
1777 set_buffer_uptodate(bh); 1783 set_buffer_uptodate(bh);
1778 clear_buffer_dirty(bh); 1784 clear_buffer_dirty(bh);
1785 clear_buffer_async_write(bh);
1779 clear_buffer_delay(bh); 1786 clear_buffer_delay(bh);
1780 clear_buffer_nilfs_volatile(bh); 1787 clear_buffer_nilfs_volatile(bh);
1781 clear_buffer_nilfs_redirected(bh); 1788 clear_buffer_nilfs_redirected(bh);
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index ef999729e274..0d3a97d2d5f6 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -70,9 +70,10 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags)
70 */ 70 */
71 if (inode == NULL) { 71 if (inode == NULL) {
72 unsigned long gen = (unsigned long) dentry->d_fsdata; 72 unsigned long gen = (unsigned long) dentry->d_fsdata;
73 unsigned long pgen = 73 unsigned long pgen;
74 OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen; 74 spin_lock(&dentry->d_lock);
75 75 pgen = OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
76 spin_unlock(&dentry->d_lock);
76 trace_ocfs2_dentry_revalidate_negative(dentry->d_name.len, 77 trace_ocfs2_dentry_revalidate_negative(dentry->d_name.len,
77 dentry->d_name.name, 78 dentry->d_name.name,
78 pgen, gen); 79 pgen, gen);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 121da2dc3be8..d4e81e4a9b04 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1924,7 +1924,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1924{ 1924{
1925 int tmp, hangup_needed = 0; 1925 int tmp, hangup_needed = 0;
1926 struct ocfs2_super *osb = NULL; 1926 struct ocfs2_super *osb = NULL;
1927 char nodestr[8]; 1927 char nodestr[12];
1928 1928
1929 trace_ocfs2_dismount_volume(sb); 1929 trace_ocfs2_dismount_volume(sb);
1930 1930
diff --git a/fs/open.c b/fs/open.c
index 2a731b0d08bc..d420331ca32a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -744,14 +744,24 @@ cleanup_file:
744 744
745/** 745/**
746 * finish_open - finish opening a file 746 * finish_open - finish opening a file
747 * @od: opaque open data 747 * @file: file pointer
748 * @dentry: pointer to dentry 748 * @dentry: pointer to dentry
749 * @open: open callback 749 * @open: open callback
750 * @opened: state of open
750 * 751 *
751 * This can be used to finish opening a file passed to i_op->atomic_open(). 752 * This can be used to finish opening a file passed to i_op->atomic_open().
752 * 753 *
753 * If the open callback is set to NULL, then the standard f_op->open() 754 * If the open callback is set to NULL, then the standard f_op->open()
754 * filesystem callback is substituted. 755 * filesystem callback is substituted.
756 *
757 * NB: the dentry reference is _not_ consumed. If, for example, the dentry is
758 * the return value of d_splice_alias(), then the caller needs to perform dput()
759 * on it after finish_open().
760 *
761 * On successful return @file is a fully instantiated open file. After this, if
762 * an error occurs in ->atomic_open(), it needs to clean up with fput().
763 *
764 * Returns zero on success or -errno if the open failed.
755 */ 765 */
756int finish_open(struct file *file, struct dentry *dentry, 766int finish_open(struct file *file, struct dentry *dentry,
757 int (*open)(struct inode *, struct file *), 767 int (*open)(struct inode *, struct file *),
@@ -772,11 +782,16 @@ EXPORT_SYMBOL(finish_open);
772/** 782/**
773 * finish_no_open - finish ->atomic_open() without opening the file 783 * finish_no_open - finish ->atomic_open() without opening the file
774 * 784 *
775 * @od: opaque open data 785 * @file: file pointer
776 * @dentry: dentry or NULL (as returned from ->lookup()) 786 * @dentry: dentry or NULL (as returned from ->lookup())
777 * 787 *
778 * This can be used to set the result of a successful lookup in ->atomic_open(). 788 * This can be used to set the result of a successful lookup in ->atomic_open().
779 * The filesystem's atomic_open() method shall return NULL after calling this. 789 *
790 * NB: unlike finish_open() this function does consume the dentry reference and
791 * the caller need not dput() it.
792 *
793 * Returns "1" which must be the return value of ->atomic_open() after having
794 * called this function.
780 */ 795 */
781int finish_no_open(struct file *file, struct dentry *dentry) 796int finish_no_open(struct file *file, struct dentry *dentry)
782{ 797{
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 4ffb7ab5e397..b8e93a40a5d3 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -168,7 +168,7 @@ static int pstore_decompress(void *in, void *out, size_t inlen, size_t outlen)
168 int err, ret; 168 int err, ret;
169 169
170 ret = -EIO; 170 ret = -EIO;
171 err = zlib_inflateInit(&stream); 171 err = zlib_inflateInit2(&stream, WINDOW_BITS);
172 if (err != Z_OK) 172 if (err != Z_OK)
173 goto error; 173 goto error;
174 174
@@ -195,8 +195,29 @@ error:
195static void allocate_buf_for_compression(void) 195static void allocate_buf_for_compression(void)
196{ 196{
197 size_t size; 197 size_t size;
198 size_t cmpr;
199
200 switch (psinfo->bufsize) {
201 /* buffer range for efivars */
202 case 1000 ... 2000:
203 cmpr = 56;
204 break;
205 case 2001 ... 3000:
206 cmpr = 54;
207 break;
208 case 3001 ... 3999:
209 cmpr = 52;
210 break;
211 /* buffer range for nvram, erst */
212 case 4000 ... 10000:
213 cmpr = 45;
214 break;
215 default:
216 cmpr = 60;
217 break;
218 }
198 219
199 big_oops_buf_sz = (psinfo->bufsize * 100) / 45; 220 big_oops_buf_sz = (psinfo->bufsize * 100) / cmpr;
200 big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL); 221 big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
201 if (big_oops_buf) { 222 if (big_oops_buf) {
202 size = max(zlib_deflate_workspacesize(WINDOW_BITS, MEM_LEVEL), 223 size = max(zlib_deflate_workspacesize(WINDOW_BITS, MEM_LEVEL),
@@ -295,10 +316,6 @@ static void pstore_dump(struct kmsg_dumper *dumper,
295 compressed = true; 316 compressed = true;
296 total_len = zipped_len; 317 total_len = zipped_len;
297 } else { 318 } else {
298 pr_err("pstore: compression failed for Part %d"
299 " returned %d\n", part, zipped_len);
300 pr_err("pstore: Capture uncompressed"
301 " oops/panic report of Part %d\n", part);
302 compressed = false; 319 compressed = false;
303 total_len = copy_kmsg_to_buffer(hsize, len); 320 total_len = copy_kmsg_to_buffer(hsize, len);
304 } 321 }
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 73feacc49b2e..fd777032c2ba 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1163,21 +1163,6 @@ static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
1163 return NULL; 1163 return NULL;
1164} 1164}
1165 1165
1166static int newer_jl_done(struct reiserfs_journal_cnode *cn)
1167{
1168 struct super_block *sb = cn->sb;
1169 b_blocknr_t blocknr = cn->blocknr;
1170
1171 cn = cn->hprev;
1172 while (cn) {
1173 if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist &&
1174 atomic_read(&cn->jlist->j_commit_left) != 0)
1175 return 0;
1176 cn = cn->hprev;
1177 }
1178 return 1;
1179}
1180
1181static void remove_journal_hash(struct super_block *, 1166static void remove_journal_hash(struct super_block *,
1182 struct reiserfs_journal_cnode **, 1167 struct reiserfs_journal_cnode **,
1183 struct reiserfs_journal_list *, unsigned long, 1168 struct reiserfs_journal_list *, unsigned long,
@@ -1353,7 +1338,6 @@ static int flush_journal_list(struct super_block *s,
1353 reiserfs_warning(s, "clm-2048", "called with wcount %d", 1338 reiserfs_warning(s, "clm-2048", "called with wcount %d",
1354 atomic_read(&journal->j_wcount)); 1339 atomic_read(&journal->j_wcount));
1355 } 1340 }
1356 BUG_ON(jl->j_trans_id == 0);
1357 1341
1358 /* if flushall == 0, the lock is already held */ 1342 /* if flushall == 0, the lock is already held */
1359 if (flushall) { 1343 if (flushall) {
@@ -1593,31 +1577,6 @@ static int flush_journal_list(struct super_block *s,
1593 return err; 1577 return err;
1594} 1578}
1595 1579
1596static int test_transaction(struct super_block *s,
1597 struct reiserfs_journal_list *jl)
1598{
1599 struct reiserfs_journal_cnode *cn;
1600
1601 if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0)
1602 return 1;
1603
1604 cn = jl->j_realblock;
1605 while (cn) {
1606 /* if the blocknr == 0, this has been cleared from the hash,
1607 ** skip it
1608 */
1609 if (cn->blocknr == 0) {
1610 goto next;
1611 }
1612 if (cn->bh && !newer_jl_done(cn))
1613 return 0;
1614 next:
1615 cn = cn->next;
1616 cond_resched();
1617 }
1618 return 0;
1619}
1620
1621static int write_one_transaction(struct super_block *s, 1580static int write_one_transaction(struct super_block *s,
1622 struct reiserfs_journal_list *jl, 1581 struct reiserfs_journal_list *jl,
1623 struct buffer_chunk *chunk) 1582 struct buffer_chunk *chunk)
@@ -1805,6 +1764,8 @@ static int flush_used_journal_lists(struct super_block *s,
1805 break; 1764 break;
1806 tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next); 1765 tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next);
1807 } 1766 }
1767 get_journal_list(jl);
1768 get_journal_list(flush_jl);
1808 /* try to find a group of blocks we can flush across all the 1769 /* try to find a group of blocks we can flush across all the
1809 ** transactions, but only bother if we've actually spanned 1770 ** transactions, but only bother if we've actually spanned
1810 ** across multiple lists 1771 ** across multiple lists
@@ -1813,6 +1774,8 @@ static int flush_used_journal_lists(struct super_block *s,
1813 ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); 1774 ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
1814 } 1775 }
1815 flush_journal_list(s, flush_jl, 1); 1776 flush_journal_list(s, flush_jl, 1);
1777 put_journal_list(s, flush_jl);
1778 put_journal_list(s, jl);
1816 return 0; 1779 return 0;
1817} 1780}
1818 1781
@@ -3868,27 +3831,6 @@ int reiserfs_prepare_for_journal(struct super_block *sb,
3868 return 1; 3831 return 1;
3869} 3832}
3870 3833
3871static void flush_old_journal_lists(struct super_block *s)
3872{
3873 struct reiserfs_journal *journal = SB_JOURNAL(s);
3874 struct reiserfs_journal_list *jl;
3875 struct list_head *entry;
3876 time_t now = get_seconds();
3877
3878 while (!list_empty(&journal->j_journal_list)) {
3879 entry = journal->j_journal_list.next;
3880 jl = JOURNAL_LIST_ENTRY(entry);
3881 /* this check should always be run, to send old lists to disk */
3882 if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) &&
3883 atomic_read(&jl->j_commit_left) == 0 &&
3884 test_transaction(s, jl)) {
3885 flush_used_journal_lists(s, jl);
3886 } else {
3887 break;
3888 }
3889 }
3890}
3891
3892/* 3834/*
3893** long and ugly. If flush, will not return until all commit 3835** long and ugly. If flush, will not return until all commit
3894** blocks and all real buffers in the trans are on disk. 3836** blocks and all real buffers in the trans are on disk.
@@ -4232,7 +4174,6 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4232 } 4174 }
4233 } 4175 }
4234 } 4176 }
4235 flush_old_journal_lists(sb);
4236 4177
4237 journal->j_current_jl->j_list_bitmap = 4178 journal->j_current_jl->j_list_bitmap =
4238 get_list_bitmap(sb, journal->j_current_jl); 4179 get_list_bitmap(sb, journal->j_current_jl);
diff --git a/fs/super.c b/fs/super.c
index 3a96c9783a8b..0225c20f8770 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -264,6 +264,8 @@ out_free_sb:
264 */ 264 */
265static inline void destroy_super(struct super_block *s) 265static inline void destroy_super(struct super_block *s)
266{ 266{
267 list_lru_destroy(&s->s_dentry_lru);
268 list_lru_destroy(&s->s_inode_lru);
267#ifdef CONFIG_SMP 269#ifdef CONFIG_SMP
268 free_percpu(s->s_files); 270 free_percpu(s->s_files);
269#endif 271#endif
@@ -323,8 +325,6 @@ void deactivate_locked_super(struct super_block *s)
323 325
324 /* caches are now gone, we can safely kill the shrinker now */ 326 /* caches are now gone, we can safely kill the shrinker now */
325 unregister_shrinker(&s->s_shrink); 327 unregister_shrinker(&s->s_shrink);
326 list_lru_destroy(&s->s_dentry_lru);
327 list_lru_destroy(&s->s_inode_lru);
328 328
329 put_filesystem(fs); 329 put_filesystem(fs);
330 put_super(s); 330 put_super(s);
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index d0c6a007ce83..eda10959714f 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -487,6 +487,7 @@ static int v7_fill_super(struct super_block *sb, void *data, int silent)
487 sbi->s_sb = sb; 487 sbi->s_sb = sb;
488 sbi->s_block_base = 0; 488 sbi->s_block_base = 0;
489 sbi->s_type = FSTYPE_V7; 489 sbi->s_type = FSTYPE_V7;
490 mutex_init(&sbi->s_lock);
490 sb->s_fs_info = sbi; 491 sb->s_fs_info = sbi;
491 492
492 sb_set_blocksize(sb, 512); 493 sb_set_blocksize(sb, 512);
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 7e5aae4bf46f..6eaf5edf1ea1 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -30,18 +30,17 @@ void udf_free_inode(struct inode *inode)
30{ 30{
31 struct super_block *sb = inode->i_sb; 31 struct super_block *sb = inode->i_sb;
32 struct udf_sb_info *sbi = UDF_SB(sb); 32 struct udf_sb_info *sbi = UDF_SB(sb);
33 struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb);
33 34
34 mutex_lock(&sbi->s_alloc_mutex); 35 if (lvidiu) {
35 if (sbi->s_lvid_bh) { 36 mutex_lock(&sbi->s_alloc_mutex);
36 struct logicalVolIntegrityDescImpUse *lvidiu =
37 udf_sb_lvidiu(sbi);
38 if (S_ISDIR(inode->i_mode)) 37 if (S_ISDIR(inode->i_mode))
39 le32_add_cpu(&lvidiu->numDirs, -1); 38 le32_add_cpu(&lvidiu->numDirs, -1);
40 else 39 else
41 le32_add_cpu(&lvidiu->numFiles, -1); 40 le32_add_cpu(&lvidiu->numFiles, -1);
42 udf_updated_lvid(sb); 41 udf_updated_lvid(sb);
42 mutex_unlock(&sbi->s_alloc_mutex);
43 } 43 }
44 mutex_unlock(&sbi->s_alloc_mutex);
45 44
46 udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1); 45 udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1);
47} 46}
@@ -55,6 +54,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err)
55 uint32_t start = UDF_I(dir)->i_location.logicalBlockNum; 54 uint32_t start = UDF_I(dir)->i_location.logicalBlockNum;
56 struct udf_inode_info *iinfo; 55 struct udf_inode_info *iinfo;
57 struct udf_inode_info *dinfo = UDF_I(dir); 56 struct udf_inode_info *dinfo = UDF_I(dir);
57 struct logicalVolIntegrityDescImpUse *lvidiu;
58 58
59 inode = new_inode(sb); 59 inode = new_inode(sb);
60 60
@@ -92,12 +92,10 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err)
92 return NULL; 92 return NULL;
93 } 93 }
94 94
95 if (sbi->s_lvid_bh) { 95 lvidiu = udf_sb_lvidiu(sb);
96 struct logicalVolIntegrityDescImpUse *lvidiu; 96 if (lvidiu) {
97
98 iinfo->i_unique = lvid_get_unique_id(sb); 97 iinfo->i_unique = lvid_get_unique_id(sb);
99 mutex_lock(&sbi->s_alloc_mutex); 98 mutex_lock(&sbi->s_alloc_mutex);
100 lvidiu = udf_sb_lvidiu(sbi);
101 if (S_ISDIR(mode)) 99 if (S_ISDIR(mode))
102 le32_add_cpu(&lvidiu->numDirs, 1); 100 le32_add_cpu(&lvidiu->numDirs, 1);
103 else 101 else
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 839a2bad7f45..91219385691d 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -94,13 +94,25 @@ static unsigned int udf_count_free(struct super_block *);
94static int udf_statfs(struct dentry *, struct kstatfs *); 94static int udf_statfs(struct dentry *, struct kstatfs *);
95static int udf_show_options(struct seq_file *, struct dentry *); 95static int udf_show_options(struct seq_file *, struct dentry *);
96 96
97struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi) 97struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct super_block *sb)
98{ 98{
99 struct logicalVolIntegrityDesc *lvid = 99 struct logicalVolIntegrityDesc *lvid;
100 (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data; 100 unsigned int partnum;
101 __u32 number_of_partitions = le32_to_cpu(lvid->numOfPartitions); 101 unsigned int offset;
102 __u32 offset = number_of_partitions * 2 * 102
103 sizeof(uint32_t)/sizeof(uint8_t); 103 if (!UDF_SB(sb)->s_lvid_bh)
104 return NULL;
105 lvid = (struct logicalVolIntegrityDesc *)UDF_SB(sb)->s_lvid_bh->b_data;
106 partnum = le32_to_cpu(lvid->numOfPartitions);
107 if ((sb->s_blocksize - sizeof(struct logicalVolIntegrityDescImpUse) -
108 offsetof(struct logicalVolIntegrityDesc, impUse)) /
109 (2 * sizeof(uint32_t)) < partnum) {
110 udf_err(sb, "Logical volume integrity descriptor corrupted "
111 "(numOfPartitions = %u)!\n", partnum);
112 return NULL;
113 }
114 /* The offset is to skip freeSpaceTable and sizeTable arrays */
115 offset = partnum * 2 * sizeof(uint32_t);
104 return (struct logicalVolIntegrityDescImpUse *)&(lvid->impUse[offset]); 116 return (struct logicalVolIntegrityDescImpUse *)&(lvid->impUse[offset]);
105} 117}
106 118
@@ -629,9 +641,10 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
629 struct udf_options uopt; 641 struct udf_options uopt;
630 struct udf_sb_info *sbi = UDF_SB(sb); 642 struct udf_sb_info *sbi = UDF_SB(sb);
631 int error = 0; 643 int error = 0;
644 struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb);
632 645
633 if (sbi->s_lvid_bh) { 646 if (lvidiu) {
634 int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev); 647 int write_rev = le16_to_cpu(lvidiu->minUDFWriteRev);
635 if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY)) 648 if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY))
636 return -EACCES; 649 return -EACCES;
637 } 650 }
@@ -1905,11 +1918,12 @@ static void udf_open_lvid(struct super_block *sb)
1905 1918
1906 if (!bh) 1919 if (!bh)
1907 return; 1920 return;
1908
1909 mutex_lock(&sbi->s_alloc_mutex);
1910 lvid = (struct logicalVolIntegrityDesc *)bh->b_data; 1921 lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
1911 lvidiu = udf_sb_lvidiu(sbi); 1922 lvidiu = udf_sb_lvidiu(sb);
1923 if (!lvidiu)
1924 return;
1912 1925
1926 mutex_lock(&sbi->s_alloc_mutex);
1913 lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; 1927 lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
1914 lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; 1928 lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
1915 udf_time_to_disk_stamp(&lvid->recordingDateAndTime, 1929 udf_time_to_disk_stamp(&lvid->recordingDateAndTime,
@@ -1937,10 +1951,12 @@ static void udf_close_lvid(struct super_block *sb)
1937 1951
1938 if (!bh) 1952 if (!bh)
1939 return; 1953 return;
1954 lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
1955 lvidiu = udf_sb_lvidiu(sb);
1956 if (!lvidiu)
1957 return;
1940 1958
1941 mutex_lock(&sbi->s_alloc_mutex); 1959 mutex_lock(&sbi->s_alloc_mutex);
1942 lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
1943 lvidiu = udf_sb_lvidiu(sbi);
1944 lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; 1960 lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
1945 lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; 1961 lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
1946 udf_time_to_disk_stamp(&lvid->recordingDateAndTime, CURRENT_TIME); 1962 udf_time_to_disk_stamp(&lvid->recordingDateAndTime, CURRENT_TIME);
@@ -2093,15 +2109,19 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2093 2109
2094 if (sbi->s_lvid_bh) { 2110 if (sbi->s_lvid_bh) {
2095 struct logicalVolIntegrityDescImpUse *lvidiu = 2111 struct logicalVolIntegrityDescImpUse *lvidiu =
2096 udf_sb_lvidiu(sbi); 2112 udf_sb_lvidiu(sb);
2097 uint16_t minUDFReadRev = le16_to_cpu(lvidiu->minUDFReadRev); 2113 uint16_t minUDFReadRev;
2098 uint16_t minUDFWriteRev = le16_to_cpu(lvidiu->minUDFWriteRev); 2114 uint16_t minUDFWriteRev;
2099 /* uint16_t maxUDFWriteRev =
2100 le16_to_cpu(lvidiu->maxUDFWriteRev); */
2101 2115
2116 if (!lvidiu) {
2117 ret = -EINVAL;
2118 goto error_out;
2119 }
2120 minUDFReadRev = le16_to_cpu(lvidiu->minUDFReadRev);
2121 minUDFWriteRev = le16_to_cpu(lvidiu->minUDFWriteRev);
2102 if (minUDFReadRev > UDF_MAX_READ_VERSION) { 2122 if (minUDFReadRev > UDF_MAX_READ_VERSION) {
2103 udf_err(sb, "minUDFReadRev=%x (max is %x)\n", 2123 udf_err(sb, "minUDFReadRev=%x (max is %x)\n",
2104 le16_to_cpu(lvidiu->minUDFReadRev), 2124 minUDFReadRev,
2105 UDF_MAX_READ_VERSION); 2125 UDF_MAX_READ_VERSION);
2106 ret = -EINVAL; 2126 ret = -EINVAL;
2107 goto error_out; 2127 goto error_out;
@@ -2265,11 +2285,7 @@ static int udf_statfs(struct dentry *dentry, struct kstatfs *buf)
2265 struct logicalVolIntegrityDescImpUse *lvidiu; 2285 struct logicalVolIntegrityDescImpUse *lvidiu;
2266 u64 id = huge_encode_dev(sb->s_bdev->bd_dev); 2286 u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
2267 2287
2268 if (sbi->s_lvid_bh != NULL) 2288 lvidiu = udf_sb_lvidiu(sb);
2269 lvidiu = udf_sb_lvidiu(sbi);
2270 else
2271 lvidiu = NULL;
2272
2273 buf->f_type = UDF_SUPER_MAGIC; 2289 buf->f_type = UDF_SUPER_MAGIC;
2274 buf->f_bsize = sb->s_blocksize; 2290 buf->f_bsize = sb->s_blocksize;
2275 buf->f_blocks = sbi->s_partmaps[sbi->s_partition].s_partition_len; 2291 buf->f_blocks = sbi->s_partmaps[sbi->s_partition].s_partition_len;
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index ed401e94aa8c..1f32c7bd9f57 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -162,7 +162,7 @@ static inline struct udf_sb_info *UDF_SB(struct super_block *sb)
162 return sb->s_fs_info; 162 return sb->s_fs_info;
163} 163}
164 164
165struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi); 165struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct super_block *sb);
166 166
167int udf_compute_nr_groups(struct super_block *sb, u32 partition); 167int udf_compute_nr_groups(struct super_block *sb, u32 partition);
168 168
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 88c5ea75ebf6..f1d85cfc0a54 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -628,6 +628,7 @@ xfs_buf_item_unlock(
628 else if (aborted) { 628 else if (aborted) {
629 ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp)); 629 ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
630 if (lip->li_flags & XFS_LI_IN_AIL) { 630 if (lip->li_flags & XFS_LI_IN_AIL) {
631 spin_lock(&lip->li_ailp->xa_lock);
631 xfs_trans_ail_delete(lip->li_ailp, lip, 632 xfs_trans_ail_delete(lip->li_ailp, lip,
632 SHUTDOWN_LOG_IO_ERROR); 633 SHUTDOWN_LOG_IO_ERROR);
633 } 634 }
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 069537c845e5..20bf8e8002d6 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1224,6 +1224,7 @@ xfs_da3_node_toosmall(
1224 /* start with smaller blk num */ 1224 /* start with smaller blk num */
1225 forward = nodehdr.forw < nodehdr.back; 1225 forward = nodehdr.forw < nodehdr.back;
1226 for (i = 0; i < 2; forward = !forward, i++) { 1226 for (i = 0; i < 2; forward = !forward, i++) {
1227 struct xfs_da3_icnode_hdr thdr;
1227 if (forward) 1228 if (forward)
1228 blkno = nodehdr.forw; 1229 blkno = nodehdr.forw;
1229 else 1230 else
@@ -1236,10 +1237,10 @@ xfs_da3_node_toosmall(
1236 return(error); 1237 return(error);
1237 1238
1238 node = bp->b_addr; 1239 node = bp->b_addr;
1239 xfs_da3_node_hdr_from_disk(&nodehdr, node); 1240 xfs_da3_node_hdr_from_disk(&thdr, node);
1240 xfs_trans_brelse(state->args->trans, bp); 1241 xfs_trans_brelse(state->args->trans, bp);
1241 1242
1242 if (count - nodehdr.count >= 0) 1243 if (count - thdr.count >= 0)
1243 break; /* fits with at least 25% to spare */ 1244 break; /* fits with at least 25% to spare */
1244 } 1245 }
1245 if (i >= 2) { 1246 if (i >= 2) {
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 0957aa98b6c0..12dad188939d 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -1158,7 +1158,7 @@ xfs_dir2_sf_to_block(
1158 /* 1158 /*
1159 * Create entry for . 1159 * Create entry for .
1160 */ 1160 */
1161 dep = xfs_dir3_data_dot_entry_p(hdr); 1161 dep = xfs_dir3_data_dot_entry_p(mp, hdr);
1162 dep->inumber = cpu_to_be64(dp->i_ino); 1162 dep->inumber = cpu_to_be64(dp->i_ino);
1163 dep->namelen = 1; 1163 dep->namelen = 1;
1164 dep->name[0] = '.'; 1164 dep->name[0] = '.';
@@ -1172,7 +1172,7 @@ xfs_dir2_sf_to_block(
1172 /* 1172 /*
1173 * Create entry for .. 1173 * Create entry for ..
1174 */ 1174 */
1175 dep = xfs_dir3_data_dotdot_entry_p(hdr); 1175 dep = xfs_dir3_data_dotdot_entry_p(mp, hdr);
1176 dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp)); 1176 dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp));
1177 dep->namelen = 2; 1177 dep->namelen = 2;
1178 dep->name[0] = dep->name[1] = '.'; 1178 dep->name[0] = dep->name[1] = '.';
@@ -1183,7 +1183,7 @@ xfs_dir2_sf_to_block(
1183 blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); 1183 blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
1184 blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, 1184 blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
1185 (char *)dep - (char *)hdr)); 1185 (char *)dep - (char *)hdr));
1186 offset = xfs_dir3_data_first_offset(hdr); 1186 offset = xfs_dir3_data_first_offset(mp);
1187 /* 1187 /*
1188 * Loop over existing entries, stuff them in. 1188 * Loop over existing entries, stuff them in.
1189 */ 1189 */
diff --git a/fs/xfs/xfs_dir2_format.h b/fs/xfs/xfs_dir2_format.h
index a0961a61ac1a..9cf67381adf6 100644
--- a/fs/xfs/xfs_dir2_format.h
+++ b/fs/xfs/xfs_dir2_format.h
@@ -497,69 +497,58 @@ xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr)
497/* 497/*
498 * Offsets of . and .. in data space (always block 0) 498 * Offsets of . and .. in data space (always block 0)
499 * 499 *
500 * The macros are used for shortform directories as they have no headers to read
501 * the magic number out of. Shortform directories need to know the size of the
502 * data block header because the sfe embeds the block offset of the entry into
503 * it so that it doesn't change when format conversion occurs. Bad Things Happen
504 * if we don't follow this rule.
505 *
506 * XXX: there is scope for significant optimisation of the logic here. Right 500 * XXX: there is scope for significant optimisation of the logic here. Right
507 * now we are checking for "dir3 format" over and over again. Ideally we should 501 * now we are checking for "dir3 format" over and over again. Ideally we should
508 * only do it once for each operation. 502 * only do it once for each operation.
509 */ 503 */
510#define XFS_DIR3_DATA_DOT_OFFSET(mp) \
511 xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&(mp)->m_sb))
512#define XFS_DIR3_DATA_DOTDOT_OFFSET(mp) \
513 (XFS_DIR3_DATA_DOT_OFFSET(mp) + xfs_dir3_data_entsize(mp, 1))
514#define XFS_DIR3_DATA_FIRST_OFFSET(mp) \
515 (XFS_DIR3_DATA_DOTDOT_OFFSET(mp) + xfs_dir3_data_entsize(mp, 2))
516
517static inline xfs_dir2_data_aoff_t 504static inline xfs_dir2_data_aoff_t
518xfs_dir3_data_dot_offset(struct xfs_dir2_data_hdr *hdr) 505xfs_dir3_data_dot_offset(struct xfs_mount *mp)
519{ 506{
520 return xfs_dir3_data_entry_offset(hdr); 507 return xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&mp->m_sb));
521} 508}
522 509
523static inline xfs_dir2_data_aoff_t 510static inline xfs_dir2_data_aoff_t
524xfs_dir3_data_dotdot_offset(struct xfs_dir2_data_hdr *hdr) 511xfs_dir3_data_dotdot_offset(struct xfs_mount *mp)
525{ 512{
526 bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || 513 return xfs_dir3_data_dot_offset(mp) +
527 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC); 514 xfs_dir3_data_entsize(mp, 1);
528 return xfs_dir3_data_dot_offset(hdr) +
529 __xfs_dir3_data_entsize(dir3, 1);
530} 515}
531 516
532static inline xfs_dir2_data_aoff_t 517static inline xfs_dir2_data_aoff_t
533xfs_dir3_data_first_offset(struct xfs_dir2_data_hdr *hdr) 518xfs_dir3_data_first_offset(struct xfs_mount *mp)
534{ 519{
535 bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || 520 return xfs_dir3_data_dotdot_offset(mp) +
536 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC); 521 xfs_dir3_data_entsize(mp, 2);
537 return xfs_dir3_data_dotdot_offset(hdr) +
538 __xfs_dir3_data_entsize(dir3, 2);
539} 522}
540 523
541/* 524/*
542 * location of . and .. in data space (always block 0) 525 * location of . and .. in data space (always block 0)
543 */ 526 */
544static inline struct xfs_dir2_data_entry * 527static inline struct xfs_dir2_data_entry *
545xfs_dir3_data_dot_entry_p(struct xfs_dir2_data_hdr *hdr) 528xfs_dir3_data_dot_entry_p(
529 struct xfs_mount *mp,
530 struct xfs_dir2_data_hdr *hdr)
546{ 531{
547 return (struct xfs_dir2_data_entry *) 532 return (struct xfs_dir2_data_entry *)
548 ((char *)hdr + xfs_dir3_data_dot_offset(hdr)); 533 ((char *)hdr + xfs_dir3_data_dot_offset(mp));
549} 534}
550 535
551static inline struct xfs_dir2_data_entry * 536static inline struct xfs_dir2_data_entry *
552xfs_dir3_data_dotdot_entry_p(struct xfs_dir2_data_hdr *hdr) 537xfs_dir3_data_dotdot_entry_p(
538 struct xfs_mount *mp,
539 struct xfs_dir2_data_hdr *hdr)
553{ 540{
554 return (struct xfs_dir2_data_entry *) 541 return (struct xfs_dir2_data_entry *)
555 ((char *)hdr + xfs_dir3_data_dotdot_offset(hdr)); 542 ((char *)hdr + xfs_dir3_data_dotdot_offset(mp));
556} 543}
557 544
558static inline struct xfs_dir2_data_entry * 545static inline struct xfs_dir2_data_entry *
559xfs_dir3_data_first_entry_p(struct xfs_dir2_data_hdr *hdr) 546xfs_dir3_data_first_entry_p(
547 struct xfs_mount *mp,
548 struct xfs_dir2_data_hdr *hdr)
560{ 549{
561 return (struct xfs_dir2_data_entry *) 550 return (struct xfs_dir2_data_entry *)
562 ((char *)hdr + xfs_dir3_data_first_offset(hdr)); 551 ((char *)hdr + xfs_dir3_data_first_offset(mp));
563} 552}
564 553
565/* 554/*
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 8993ec17452c..8f84153e98a8 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -119,9 +119,9 @@ xfs_dir2_sf_getdents(
119 * mp->m_dirdatablk. 119 * mp->m_dirdatablk.
120 */ 120 */
121 dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, 121 dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
122 XFS_DIR3_DATA_DOT_OFFSET(mp)); 122 xfs_dir3_data_dot_offset(mp));
123 dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, 123 dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
124 XFS_DIR3_DATA_DOTDOT_OFFSET(mp)); 124 xfs_dir3_data_dotdot_offset(mp));
125 125
126 /* 126 /*
127 * Put . entry unless we're starting past it. 127 * Put . entry unless we're starting past it.
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index bb6e2848f473..3ef6d402084c 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -557,7 +557,7 @@ xfs_dir2_sf_addname_hard(
557 * to insert the new entry. 557 * to insert the new entry.
558 * If it's going to end up at the end then oldsfep will point there. 558 * If it's going to end up at the end then oldsfep will point there.
559 */ 559 */
560 for (offset = XFS_DIR3_DATA_FIRST_OFFSET(mp), 560 for (offset = xfs_dir3_data_first_offset(mp),
561 oldsfep = xfs_dir2_sf_firstentry(oldsfp), 561 oldsfep = xfs_dir2_sf_firstentry(oldsfp),
562 add_datasize = xfs_dir3_data_entsize(mp, args->namelen), 562 add_datasize = xfs_dir3_data_entsize(mp, args->namelen),
563 eof = (char *)oldsfep == &buf[old_isize]; 563 eof = (char *)oldsfep == &buf[old_isize];
@@ -640,7 +640,7 @@ xfs_dir2_sf_addname_pick(
640 640
641 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; 641 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
642 size = xfs_dir3_data_entsize(mp, args->namelen); 642 size = xfs_dir3_data_entsize(mp, args->namelen);
643 offset = XFS_DIR3_DATA_FIRST_OFFSET(mp); 643 offset = xfs_dir3_data_first_offset(mp);
644 sfep = xfs_dir2_sf_firstentry(sfp); 644 sfep = xfs_dir2_sf_firstentry(sfp);
645 holefit = 0; 645 holefit = 0;
646 /* 646 /*
@@ -713,7 +713,7 @@ xfs_dir2_sf_check(
713 mp = dp->i_mount; 713 mp = dp->i_mount;
714 714
715 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; 715 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
716 offset = XFS_DIR3_DATA_FIRST_OFFSET(mp); 716 offset = xfs_dir3_data_first_offset(mp);
717 ino = xfs_dir2_sf_get_parent_ino(sfp); 717 ino = xfs_dir2_sf_get_parent_ino(sfp);
718 i8count = ino > XFS_DIR2_MAX_SHORT_INUM; 718 i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
719 719
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 71520e6e5d65..1ee776d477c3 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -64,7 +64,8 @@ int xfs_dqerror_mod = 33;
64struct kmem_zone *xfs_qm_dqtrxzone; 64struct kmem_zone *xfs_qm_dqtrxzone;
65static struct kmem_zone *xfs_qm_dqzone; 65static struct kmem_zone *xfs_qm_dqzone;
66 66
67static struct lock_class_key xfs_dquot_other_class; 67static struct lock_class_key xfs_dquot_group_class;
68static struct lock_class_key xfs_dquot_project_class;
68 69
69/* 70/*
70 * This is called to free all the memory associated with a dquot 71 * This is called to free all the memory associated with a dquot
@@ -703,8 +704,20 @@ xfs_qm_dqread(
703 * Make sure group quotas have a different lock class than user 704 * Make sure group quotas have a different lock class than user
704 * quotas. 705 * quotas.
705 */ 706 */
706 if (!(type & XFS_DQ_USER)) 707 switch (type) {
707 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); 708 case XFS_DQ_USER:
709 /* uses the default lock class */
710 break;
711 case XFS_DQ_GROUP:
712 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_group_class);
713 break;
714 case XFS_DQ_PROJ:
715 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_project_class);
716 break;
717 default:
718 ASSERT(0);
719 break;
720 }
708 721
709 XFS_STATS_INC(xs_qm_dquot); 722 XFS_STATS_INC(xs_qm_dquot);
710 723
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 1edb5cc3e5f4..18272c766a50 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -515,7 +515,7 @@ typedef struct xfs_swapext
515/* XFS_IOC_GETBIOSIZE ---- deprecated 47 */ 515/* XFS_IOC_GETBIOSIZE ---- deprecated 47 */
516#define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap) 516#define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap)
517#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64) 517#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64)
518#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_eofblocks) 518#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks)
519 519
520/* 520/*
521 * ioctl commands that replace IRIX syssgi()'s 521 * ioctl commands that replace IRIX syssgi()'s
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 193206ba4358..474807a401c8 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -119,11 +119,6 @@ xfs_inode_free(
119 ip->i_itemp = NULL; 119 ip->i_itemp = NULL;
120 } 120 }
121 121
122 /* asserts to verify all state is correct here */
123 ASSERT(atomic_read(&ip->i_pincount) == 0);
124 ASSERT(!spin_is_locked(&ip->i_flags_lock));
125 ASSERT(!xfs_isiflocked(ip));
126
127 /* 122 /*
128 * Because we use RCU freeing we need to ensure the inode always 123 * Because we use RCU freeing we need to ensure the inode always
129 * appears to be reclaimed with an invalid inode number when in the 124 * appears to be reclaimed with an invalid inode number when in the
@@ -135,6 +130,10 @@ xfs_inode_free(
135 ip->i_ino = 0; 130 ip->i_ino = 0;
136 spin_unlock(&ip->i_flags_lock); 131 spin_unlock(&ip->i_flags_lock);
137 132
133 /* asserts to verify all state is correct here */
134 ASSERT(atomic_read(&ip->i_pincount) == 0);
135 ASSERT(!xfs_isiflocked(ip));
136
138 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); 137 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
139} 138}
140 139
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index dabda9521b4b..39797490a1f1 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1585,6 +1585,7 @@ xlog_recover_add_to_trans(
1585 "bad number of regions (%d) in inode log format", 1585 "bad number of regions (%d) in inode log format",
1586 in_f->ilf_size); 1586 in_f->ilf_size);
1587 ASSERT(0); 1587 ASSERT(0);
1588 kmem_free(ptr);
1588 return XFS_ERROR(EIO); 1589 return XFS_ERROR(EIO);
1589 } 1590 }
1590 1591
@@ -1970,6 +1971,13 @@ xlog_recover_do_inode_buffer(
1970 * magic number. If we don't recognise the magic number in the buffer, then 1971 * magic number. If we don't recognise the magic number in the buffer, then
1971 * return a LSN of -1 so that the caller knows it was an unrecognised block and 1972 * return a LSN of -1 so that the caller knows it was an unrecognised block and
1972 * so can recover the buffer. 1973 * so can recover the buffer.
1974 *
1975 * Note: we cannot rely solely on magic number matches to determine that the
1976 * buffer has a valid LSN - we also need to verify that it belongs to this
1977 * filesystem, so we need to extract the object's LSN and compare it to that
1978 * which we read from the superblock. If the UUIDs don't match, then we've got a
1979 * stale metadata block from an old filesystem instance that we need to recover
1980 * over the top of.
1973 */ 1981 */
1974static xfs_lsn_t 1982static xfs_lsn_t
1975xlog_recover_get_buf_lsn( 1983xlog_recover_get_buf_lsn(
@@ -1980,6 +1988,8 @@ xlog_recover_get_buf_lsn(
1980 __uint16_t magic16; 1988 __uint16_t magic16;
1981 __uint16_t magicda; 1989 __uint16_t magicda;
1982 void *blk = bp->b_addr; 1990 void *blk = bp->b_addr;
1991 uuid_t *uuid;
1992 xfs_lsn_t lsn = -1;
1983 1993
1984 /* v4 filesystems always recover immediately */ 1994 /* v4 filesystems always recover immediately */
1985 if (!xfs_sb_version_hascrc(&mp->m_sb)) 1995 if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -1992,43 +2002,79 @@ xlog_recover_get_buf_lsn(
1992 case XFS_ABTB_MAGIC: 2002 case XFS_ABTB_MAGIC:
1993 case XFS_ABTC_MAGIC: 2003 case XFS_ABTC_MAGIC:
1994 case XFS_IBT_CRC_MAGIC: 2004 case XFS_IBT_CRC_MAGIC:
1995 case XFS_IBT_MAGIC: 2005 case XFS_IBT_MAGIC: {
1996 return be64_to_cpu( 2006 struct xfs_btree_block *btb = blk;
1997 ((struct xfs_btree_block *)blk)->bb_u.s.bb_lsn); 2007
2008 lsn = be64_to_cpu(btb->bb_u.s.bb_lsn);
2009 uuid = &btb->bb_u.s.bb_uuid;
2010 break;
2011 }
1998 case XFS_BMAP_CRC_MAGIC: 2012 case XFS_BMAP_CRC_MAGIC:
1999 case XFS_BMAP_MAGIC: 2013 case XFS_BMAP_MAGIC: {
2000 return be64_to_cpu( 2014 struct xfs_btree_block *btb = blk;
2001 ((struct xfs_btree_block *)blk)->bb_u.l.bb_lsn); 2015
2016 lsn = be64_to_cpu(btb->bb_u.l.bb_lsn);
2017 uuid = &btb->bb_u.l.bb_uuid;
2018 break;
2019 }
2002 case XFS_AGF_MAGIC: 2020 case XFS_AGF_MAGIC:
2003 return be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn); 2021 lsn = be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn);
2022 uuid = &((struct xfs_agf *)blk)->agf_uuid;
2023 break;
2004 case XFS_AGFL_MAGIC: 2024 case XFS_AGFL_MAGIC:
2005 return be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn); 2025 lsn = be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn);
2026 uuid = &((struct xfs_agfl *)blk)->agfl_uuid;
2027 break;
2006 case XFS_AGI_MAGIC: 2028 case XFS_AGI_MAGIC:
2007 return be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn); 2029 lsn = be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn);
2030 uuid = &((struct xfs_agi *)blk)->agi_uuid;
2031 break;
2008 case XFS_SYMLINK_MAGIC: 2032 case XFS_SYMLINK_MAGIC:
2009 return be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn); 2033 lsn = be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn);
2034 uuid = &((struct xfs_dsymlink_hdr *)blk)->sl_uuid;
2035 break;
2010 case XFS_DIR3_BLOCK_MAGIC: 2036 case XFS_DIR3_BLOCK_MAGIC:
2011 case XFS_DIR3_DATA_MAGIC: 2037 case XFS_DIR3_DATA_MAGIC:
2012 case XFS_DIR3_FREE_MAGIC: 2038 case XFS_DIR3_FREE_MAGIC:
2013 return be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn); 2039 lsn = be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn);
2040 uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid;
2041 break;
2014 case XFS_ATTR3_RMT_MAGIC: 2042 case XFS_ATTR3_RMT_MAGIC:
2015 return be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn); 2043 lsn = be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn);
2044 uuid = &((struct xfs_attr3_rmt_hdr *)blk)->rm_uuid;
2045 break;
2016 case XFS_SB_MAGIC: 2046 case XFS_SB_MAGIC:
2017 return be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); 2047 lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
2048 uuid = &((struct xfs_dsb *)blk)->sb_uuid;
2049 break;
2018 default: 2050 default:
2019 break; 2051 break;
2020 } 2052 }
2021 2053
2054 if (lsn != (xfs_lsn_t)-1) {
2055 if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
2056 goto recover_immediately;
2057 return lsn;
2058 }
2059
2022 magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic); 2060 magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic);
2023 switch (magicda) { 2061 switch (magicda) {
2024 case XFS_DIR3_LEAF1_MAGIC: 2062 case XFS_DIR3_LEAF1_MAGIC:
2025 case XFS_DIR3_LEAFN_MAGIC: 2063 case XFS_DIR3_LEAFN_MAGIC:
2026 case XFS_DA3_NODE_MAGIC: 2064 case XFS_DA3_NODE_MAGIC:
2027 return be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn); 2065 lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
2066 uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;
2067 break;
2028 default: 2068 default:
2029 break; 2069 break;
2030 } 2070 }
2031 2071
2072 if (lsn != (xfs_lsn_t)-1) {
2073 if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
2074 goto recover_immediately;
2075 return lsn;
2076 }
2077
2032 /* 2078 /*
2033 * We do individual object checks on dquot and inode buffers as they 2079 * We do individual object checks on dquot and inode buffers as they
2034 * have their own individual LSN records. Also, we could have a stale 2080 * have their own individual LSN records. Also, we could have a stale