diff options
Diffstat (limited to 'fs')
323 files changed, 12718 insertions, 5323 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 6894b085f0ee..620d93489539 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c | |||
@@ -335,7 +335,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, | |||
335 | } | 335 | } |
336 | init_rwsem(&v9ses->rename_sem); | 336 | init_rwsem(&v9ses->rename_sem); |
337 | 337 | ||
338 | rc = bdi_setup_and_register(&v9ses->bdi, "9p", BDI_CAP_MAP_COPY); | 338 | rc = bdi_setup_and_register(&v9ses->bdi, "9p"); |
339 | if (rc) { | 339 | if (rc) { |
340 | kfree(v9ses->aname); | 340 | kfree(v9ses->aname); |
341 | kfree(v9ses->uname); | 341 | kfree(v9ses->uname); |
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 5594505e6e73..b40133796b87 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c | |||
@@ -831,7 +831,6 @@ static const struct vm_operations_struct v9fs_file_vm_ops = { | |||
831 | .fault = filemap_fault, | 831 | .fault = filemap_fault, |
832 | .map_pages = filemap_map_pages, | 832 | .map_pages = filemap_map_pages, |
833 | .page_mkwrite = v9fs_vm_page_mkwrite, | 833 | .page_mkwrite = v9fs_vm_page_mkwrite, |
834 | .remap_pages = generic_file_remap_pages, | ||
835 | }; | 834 | }; |
836 | 835 | ||
837 | static const struct vm_operations_struct v9fs_mmap_file_vm_ops = { | 836 | static const struct vm_operations_struct v9fs_mmap_file_vm_ops = { |
@@ -839,7 +838,6 @@ static const struct vm_operations_struct v9fs_mmap_file_vm_ops = { | |||
839 | .fault = filemap_fault, | 838 | .fault = filemap_fault, |
840 | .map_pages = filemap_map_pages, | 839 | .map_pages = filemap_map_pages, |
841 | .page_mkwrite = v9fs_vm_page_mkwrite, | 840 | .page_mkwrite = v9fs_vm_page_mkwrite, |
842 | .remap_pages = generic_file_remap_pages, | ||
843 | }; | 841 | }; |
844 | 842 | ||
845 | 843 | ||
diff --git a/fs/Kconfig b/fs/Kconfig index 664991afe0c0..ec35851e5b71 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -13,13 +13,6 @@ if BLOCK | |||
13 | source "fs/ext2/Kconfig" | 13 | source "fs/ext2/Kconfig" |
14 | source "fs/ext3/Kconfig" | 14 | source "fs/ext3/Kconfig" |
15 | source "fs/ext4/Kconfig" | 15 | source "fs/ext4/Kconfig" |
16 | |||
17 | config FS_XIP | ||
18 | # execute in place | ||
19 | bool | ||
20 | depends on EXT2_FS_XIP | ||
21 | default y | ||
22 | |||
23 | source "fs/jbd/Kconfig" | 16 | source "fs/jbd/Kconfig" |
24 | source "fs/jbd2/Kconfig" | 17 | source "fs/jbd2/Kconfig" |
25 | 18 | ||
@@ -40,6 +33,21 @@ source "fs/ocfs2/Kconfig" | |||
40 | source "fs/btrfs/Kconfig" | 33 | source "fs/btrfs/Kconfig" |
41 | source "fs/nilfs2/Kconfig" | 34 | source "fs/nilfs2/Kconfig" |
42 | 35 | ||
36 | config FS_DAX | ||
37 | bool "Direct Access (DAX) support" | ||
38 | depends on MMU | ||
39 | depends on !(ARM || MIPS || SPARC) | ||
40 | help | ||
41 | Direct Access (DAX) can be used on memory-backed block devices. | ||
42 | If the block device supports DAX and the filesystem supports DAX, | ||
43 | then you can avoid using the pagecache to buffer I/Os. Turning | ||
44 | on this option will compile in support for DAX; you will need to | ||
45 | mount the filesystem using the -o dax option. | ||
46 | |||
47 | If you do not have a block device that is capable of using this, | ||
48 | or if unsure, say N. Saying Y will increase the size of the kernel | ||
49 | by about 5kB. | ||
50 | |||
43 | endif # BLOCK | 51 | endif # BLOCK |
44 | 52 | ||
45 | # Posix ACL utility routines | 53 | # Posix ACL utility routines |
@@ -165,6 +173,7 @@ config HUGETLB_PAGE | |||
165 | def_bool HUGETLBFS | 173 | def_bool HUGETLBFS |
166 | 174 | ||
167 | source "fs/configfs/Kconfig" | 175 | source "fs/configfs/Kconfig" |
176 | source "fs/efivarfs/Kconfig" | ||
168 | 177 | ||
169 | endmenu | 178 | endmenu |
170 | 179 | ||
@@ -209,7 +218,6 @@ source "fs/sysv/Kconfig" | |||
209 | source "fs/ufs/Kconfig" | 218 | source "fs/ufs/Kconfig" |
210 | source "fs/exofs/Kconfig" | 219 | source "fs/exofs/Kconfig" |
211 | source "fs/f2fs/Kconfig" | 220 | source "fs/f2fs/Kconfig" |
212 | source "fs/efivarfs/Kconfig" | ||
213 | 221 | ||
214 | endif # MISC_FILESYSTEMS | 222 | endif # MISC_FILESYSTEMS |
215 | 223 | ||
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index c055d56ec63d..270c48148f79 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt | |||
@@ -149,13 +149,6 @@ config BINFMT_EM86 | |||
149 | later load the module when you want to use a Linux/Intel binary. The | 149 | later load the module when you want to use a Linux/Intel binary. The |
150 | module will be called binfmt_em86. If unsure, say Y. | 150 | module will be called binfmt_em86. If unsure, say Y. |
151 | 151 | ||
152 | config BINFMT_SOM | ||
153 | tristate "Kernel support for SOM binaries" | ||
154 | depends on PARISC && HPUX | ||
155 | help | ||
156 | SOM is a binary executable format inherited from HP/UX. Say | ||
157 | Y here to be able to load and execute SOM binaries directly. | ||
158 | |||
159 | config BINFMT_MISC | 152 | config BINFMT_MISC |
160 | tristate "Kernel support for MISC binaries" | 153 | tristate "Kernel support for MISC binaries" |
161 | ---help--- | 154 | ---help--- |
diff --git a/fs/Makefile b/fs/Makefile index bedff48e8fdc..a88ac4838c9e 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -28,6 +28,7 @@ obj-$(CONFIG_SIGNALFD) += signalfd.o | |||
28 | obj-$(CONFIG_TIMERFD) += timerfd.o | 28 | obj-$(CONFIG_TIMERFD) += timerfd.o |
29 | obj-$(CONFIG_EVENTFD) += eventfd.o | 29 | obj-$(CONFIG_EVENTFD) += eventfd.o |
30 | obj-$(CONFIG_AIO) += aio.o | 30 | obj-$(CONFIG_AIO) += aio.o |
31 | obj-$(CONFIG_FS_DAX) += dax.o | ||
31 | obj-$(CONFIG_FILE_LOCKING) += locks.o | 32 | obj-$(CONFIG_FILE_LOCKING) += locks.o |
32 | obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o | 33 | obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o |
33 | obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o | 34 | obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o |
@@ -37,7 +38,6 @@ obj-$(CONFIG_BINFMT_SCRIPT) += binfmt_script.o | |||
37 | obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o | 38 | obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o |
38 | obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o | 39 | obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o |
39 | obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o | 40 | obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o |
40 | obj-$(CONFIG_BINFMT_SOM) += binfmt_som.o | ||
41 | obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o | 41 | obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o |
42 | 42 | ||
43 | obj-$(CONFIG_FS_MBCACHE) += mbcache.o | 43 | obj-$(CONFIG_FS_MBCACHE) += mbcache.o |
diff --git a/fs/affs/affs.h b/fs/affs/affs.h index ff44ff3ff015..c8764bd7497d 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h | |||
@@ -30,6 +30,8 @@ | |||
30 | #define AFFS_AC_SIZE (AFFS_CACHE_SIZE/sizeof(struct affs_ext_key)/2) | 30 | #define AFFS_AC_SIZE (AFFS_CACHE_SIZE/sizeof(struct affs_ext_key)/2) |
31 | #define AFFS_AC_MASK (AFFS_AC_SIZE-1) | 31 | #define AFFS_AC_MASK (AFFS_AC_SIZE-1) |
32 | 32 | ||
33 | #define AFFSNAMEMAX 30U | ||
34 | |||
33 | struct affs_ext_key { | 35 | struct affs_ext_key { |
34 | u32 ext; /* idx of the extended block */ | 36 | u32 ext; /* idx of the extended block */ |
35 | u32 key; /* block number */ | 37 | u32 key; /* block number */ |
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index c852f2fa1710..388da1ea815d 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c | |||
@@ -30,7 +30,7 @@ affs_insert_hash(struct inode *dir, struct buffer_head *bh) | |||
30 | ino = bh->b_blocknr; | 30 | ino = bh->b_blocknr; |
31 | offset = affs_hash_name(sb, AFFS_TAIL(sb, bh)->name + 1, AFFS_TAIL(sb, bh)->name[0]); | 31 | offset = affs_hash_name(sb, AFFS_TAIL(sb, bh)->name + 1, AFFS_TAIL(sb, bh)->name[0]); |
32 | 32 | ||
33 | pr_debug("%s(dir=%u, ino=%d)\n", __func__, (u32)dir->i_ino, ino); | 33 | pr_debug("%s(dir=%lu, ino=%d)\n", __func__, dir->i_ino, ino); |
34 | 34 | ||
35 | dir_bh = affs_bread(sb, dir->i_ino); | 35 | dir_bh = affs_bread(sb, dir->i_ino); |
36 | if (!dir_bh) | 36 | if (!dir_bh) |
@@ -80,8 +80,8 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh) | |||
80 | sb = dir->i_sb; | 80 | sb = dir->i_sb; |
81 | rem_ino = rem_bh->b_blocknr; | 81 | rem_ino = rem_bh->b_blocknr; |
82 | offset = affs_hash_name(sb, AFFS_TAIL(sb, rem_bh)->name+1, AFFS_TAIL(sb, rem_bh)->name[0]); | 82 | offset = affs_hash_name(sb, AFFS_TAIL(sb, rem_bh)->name+1, AFFS_TAIL(sb, rem_bh)->name[0]); |
83 | pr_debug("%s(dir=%d, ino=%d, hashval=%d)\n", | 83 | pr_debug("%s(dir=%lu, ino=%d, hashval=%d)\n", __func__, dir->i_ino, |
84 | __func__, (u32)dir->i_ino, rem_ino, offset); | 84 | rem_ino, offset); |
85 | 85 | ||
86 | bh = affs_bread(sb, dir->i_ino); | 86 | bh = affs_bread(sb, dir->i_ino); |
87 | if (!bh) | 87 | if (!bh) |
@@ -483,11 +483,10 @@ affs_check_name(const unsigned char *name, int len, bool notruncate) | |||
483 | { | 483 | { |
484 | int i; | 484 | int i; |
485 | 485 | ||
486 | if (len > 30) { | 486 | if (len > AFFSNAMEMAX) { |
487 | if (notruncate) | 487 | if (notruncate) |
488 | return -ENAMETOOLONG; | 488 | return -ENAMETOOLONG; |
489 | else | 489 | len = AFFSNAMEMAX; |
490 | len = 30; | ||
491 | } | 490 | } |
492 | for (i = 0; i < len; i++) { | 491 | for (i = 0; i < len; i++) { |
493 | if (name[i] < ' ' || name[i] == ':' | 492 | if (name[i] < ' ' || name[i] == ':' |
@@ -508,7 +507,7 @@ affs_check_name(const unsigned char *name, int len, bool notruncate) | |||
508 | int | 507 | int |
509 | affs_copy_name(unsigned char *bstr, struct dentry *dentry) | 508 | affs_copy_name(unsigned char *bstr, struct dentry *dentry) |
510 | { | 509 | { |
511 | int len = min(dentry->d_name.len, 30u); | 510 | u32 len = min(dentry->d_name.len, AFFSNAMEMAX); |
512 | 511 | ||
513 | *bstr++ = len; | 512 | *bstr++ = len; |
514 | memcpy(bstr, dentry->d_name.name, len); | 513 | memcpy(bstr, dentry->d_name.name, len); |
diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c index c8de51185c23..675148950fed 100644 --- a/fs/affs/bitmap.c +++ b/fs/affs/bitmap.c | |||
@@ -99,7 +99,6 @@ err_bh_read: | |||
99 | 99 | ||
100 | err_range: | 100 | err_range: |
101 | affs_error(sb, "affs_free_block","Block %u outside partition", block); | 101 | affs_error(sb, "affs_free_block","Block %u outside partition", block); |
102 | return; | ||
103 | } | 102 | } |
104 | 103 | ||
105 | /* | 104 | /* |
diff --git a/fs/affs/dir.c b/fs/affs/dir.c index 59f07bec92a6..ac4f318aafba 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c | |||
@@ -54,8 +54,7 @@ affs_readdir(struct file *file, struct dir_context *ctx) | |||
54 | u32 ino; | 54 | u32 ino; |
55 | int error = 0; | 55 | int error = 0; |
56 | 56 | ||
57 | pr_debug("%s(ino=%lu,f_pos=%lx)\n", | 57 | pr_debug("%s(ino=%lu,f_pos=%llx)\n", __func__, inode->i_ino, ctx->pos); |
58 | __func__, inode->i_ino, (unsigned long)ctx->pos); | ||
59 | 58 | ||
60 | if (ctx->pos < 2) { | 59 | if (ctx->pos < 2) { |
61 | file->private_data = (void *)0; | 60 | file->private_data = (void *)0; |
@@ -115,11 +114,11 @@ inside: | |||
115 | break; | 114 | break; |
116 | } | 115 | } |
117 | 116 | ||
118 | namelen = min(AFFS_TAIL(sb, fh_bh)->name[0], (u8)30); | 117 | namelen = min(AFFS_TAIL(sb, fh_bh)->name[0], |
118 | (u8)AFFSNAMEMAX); | ||
119 | name = AFFS_TAIL(sb, fh_bh)->name + 1; | 119 | name = AFFS_TAIL(sb, fh_bh)->name + 1; |
120 | pr_debug("readdir(): dir_emit(\"%.*s\", " | 120 | pr_debug("readdir(): dir_emit(\"%.*s\", ino=%u), hash=%d, f_pos=%llx\n", |
121 | "ino=%u), hash=%d, f_pos=%x\n", | 121 | namelen, name, ino, hash_pos, ctx->pos); |
122 | namelen, name, ino, hash_pos, (u32)ctx->pos); | ||
123 | 122 | ||
124 | if (!dir_emit(ctx, name, namelen, ino, DT_UNKNOWN)) | 123 | if (!dir_emit(ctx, name, namelen, ino, DT_UNKNOWN)) |
125 | goto done; | 124 | goto done; |
diff --git a/fs/affs/file.c b/fs/affs/file.c index 8faa6593ca6d..d2468bf95669 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c | |||
@@ -180,8 +180,7 @@ affs_get_extblock_slow(struct inode *inode, u32 ext) | |||
180 | ext_key = be32_to_cpu(AFFS_TAIL(sb, bh)->extension); | 180 | ext_key = be32_to_cpu(AFFS_TAIL(sb, bh)->extension); |
181 | if (ext < AFFS_I(inode)->i_extcnt) | 181 | if (ext < AFFS_I(inode)->i_extcnt) |
182 | goto read_ext; | 182 | goto read_ext; |
183 | if (ext > AFFS_I(inode)->i_extcnt) | 183 | BUG_ON(ext > AFFS_I(inode)->i_extcnt); |
184 | BUG(); | ||
185 | bh = affs_alloc_extblock(inode, bh, ext); | 184 | bh = affs_alloc_extblock(inode, bh, ext); |
186 | if (IS_ERR(bh)) | 185 | if (IS_ERR(bh)) |
187 | return bh; | 186 | return bh; |
@@ -198,8 +197,7 @@ affs_get_extblock_slow(struct inode *inode, u32 ext) | |||
198 | struct buffer_head *prev_bh; | 197 | struct buffer_head *prev_bh; |
199 | 198 | ||
200 | /* allocate a new extended block */ | 199 | /* allocate a new extended block */ |
201 | if (ext > AFFS_I(inode)->i_extcnt) | 200 | BUG_ON(ext > AFFS_I(inode)->i_extcnt); |
202 | BUG(); | ||
203 | 201 | ||
204 | /* get previous extended block */ | 202 | /* get previous extended block */ |
205 | prev_bh = affs_get_extblock(inode, ext - 1); | 203 | prev_bh = affs_get_extblock(inode, ext - 1); |
@@ -299,8 +297,8 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul | |||
299 | struct buffer_head *ext_bh; | 297 | struct buffer_head *ext_bh; |
300 | u32 ext; | 298 | u32 ext; |
301 | 299 | ||
302 | pr_debug("%s(%u, %lu)\n", | 300 | pr_debug("%s(%lu, %llu)\n", __func__, inode->i_ino, |
303 | __func__, (u32)inode->i_ino, (unsigned long)block); | 301 | (unsigned long long)block); |
304 | 302 | ||
305 | BUG_ON(block > (sector_t)0x7fffffffUL); | 303 | BUG_ON(block > (sector_t)0x7fffffffUL); |
306 | 304 | ||
@@ -330,8 +328,9 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul | |||
330 | 328 | ||
331 | /* store new block */ | 329 | /* store new block */ |
332 | if (bh_result->b_blocknr) | 330 | if (bh_result->b_blocknr) |
333 | affs_warning(sb, "get_block", "block already set (%lx)", | 331 | affs_warning(sb, "get_block", |
334 | (unsigned long)bh_result->b_blocknr); | 332 | "block already set (%llx)", |
333 | (unsigned long long)bh_result->b_blocknr); | ||
335 | AFFS_BLOCK(sb, ext_bh, block) = cpu_to_be32(blocknr); | 334 | AFFS_BLOCK(sb, ext_bh, block) = cpu_to_be32(blocknr); |
336 | AFFS_HEAD(ext_bh)->block_count = cpu_to_be32(block + 1); | 335 | AFFS_HEAD(ext_bh)->block_count = cpu_to_be32(block + 1); |
337 | affs_adjust_checksum(ext_bh, blocknr - bh_result->b_blocknr + 1); | 336 | affs_adjust_checksum(ext_bh, blocknr - bh_result->b_blocknr + 1); |
@@ -353,8 +352,8 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul | |||
353 | return 0; | 352 | return 0; |
354 | 353 | ||
355 | err_big: | 354 | err_big: |
356 | affs_error(inode->i_sb, "get_block", "strange block request %d", | 355 | affs_error(inode->i_sb, "get_block", "strange block request %llu", |
357 | (int)block); | 356 | (unsigned long long)block); |
358 | return -EIO; | 357 | return -EIO; |
359 | err_ext: | 358 | err_ext: |
360 | // unlock cache | 359 | // unlock cache |
@@ -399,6 +398,13 @@ affs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, | |||
399 | size_t count = iov_iter_count(iter); | 398 | size_t count = iov_iter_count(iter); |
400 | ssize_t ret; | 399 | ssize_t ret; |
401 | 400 | ||
401 | if (rw == WRITE) { | ||
402 | loff_t size = offset + count; | ||
403 | |||
404 | if (AFFS_I(inode)->mmu_private < size) | ||
405 | return 0; | ||
406 | } | ||
407 | |||
402 | ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, affs_get_block); | 408 | ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, affs_get_block); |
403 | if (ret < 0 && (rw & WRITE)) | 409 | if (ret < 0 && (rw & WRITE)) |
404 | affs_write_failed(mapping, offset + count); | 410 | affs_write_failed(mapping, offset + count); |
@@ -503,7 +509,7 @@ affs_do_readpage_ofs(struct page *page, unsigned to) | |||
503 | u32 bidx, boff, bsize; | 509 | u32 bidx, boff, bsize; |
504 | u32 tmp; | 510 | u32 tmp; |
505 | 511 | ||
506 | pr_debug("%s(%u, %ld, 0, %d)\n", __func__, (u32)inode->i_ino, | 512 | pr_debug("%s(%lu, %ld, 0, %d)\n", __func__, inode->i_ino, |
507 | page->index, to); | 513 | page->index, to); |
508 | BUG_ON(to > PAGE_CACHE_SIZE); | 514 | BUG_ON(to > PAGE_CACHE_SIZE); |
509 | kmap(page); | 515 | kmap(page); |
@@ -539,7 +545,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize) | |||
539 | u32 size, bsize; | 545 | u32 size, bsize; |
540 | u32 tmp; | 546 | u32 tmp; |
541 | 547 | ||
542 | pr_debug("%s(%u, %d)\n", __func__, (u32)inode->i_ino, newsize); | 548 | pr_debug("%s(%lu, %d)\n", __func__, inode->i_ino, newsize); |
543 | bsize = AFFS_SB(sb)->s_data_blksize; | 549 | bsize = AFFS_SB(sb)->s_data_blksize; |
544 | bh = NULL; | 550 | bh = NULL; |
545 | size = AFFS_I(inode)->mmu_private; | 551 | size = AFFS_I(inode)->mmu_private; |
@@ -608,7 +614,7 @@ affs_readpage_ofs(struct file *file, struct page *page) | |||
608 | u32 to; | 614 | u32 to; |
609 | int err; | 615 | int err; |
610 | 616 | ||
611 | pr_debug("%s(%u, %ld)\n", __func__, (u32)inode->i_ino, page->index); | 617 | pr_debug("%s(%lu, %ld)\n", __func__, inode->i_ino, page->index); |
612 | to = PAGE_CACHE_SIZE; | 618 | to = PAGE_CACHE_SIZE; |
613 | if (((page->index + 1) << PAGE_CACHE_SHIFT) > inode->i_size) { | 619 | if (((page->index + 1) << PAGE_CACHE_SHIFT) > inode->i_size) { |
614 | to = inode->i_size & ~PAGE_CACHE_MASK; | 620 | to = inode->i_size & ~PAGE_CACHE_MASK; |
@@ -631,8 +637,8 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping | |||
631 | pgoff_t index; | 637 | pgoff_t index; |
632 | int err = 0; | 638 | int err = 0; |
633 | 639 | ||
634 | pr_debug("%s(%u, %llu, %llu)\n", __func__, (u32)inode->i_ino, | 640 | pr_debug("%s(%lu, %llu, %llu)\n", __func__, inode->i_ino, pos, |
635 | (unsigned long long)pos, (unsigned long long)pos + len); | 641 | pos + len); |
636 | if (pos > AFFS_I(inode)->mmu_private) { | 642 | if (pos > AFFS_I(inode)->mmu_private) { |
637 | /* XXX: this probably leaves a too-big i_size in case of | 643 | /* XXX: this probably leaves a too-big i_size in case of |
638 | * failure. Should really be updating i_size at write_end time | 644 | * failure. Should really be updating i_size at write_end time |
@@ -681,9 +687,8 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, | |||
681 | * due to write_begin. | 687 | * due to write_begin. |
682 | */ | 688 | */ |
683 | 689 | ||
684 | pr_debug("%s(%u, %llu, %llu)\n", | 690 | pr_debug("%s(%lu, %llu, %llu)\n", __func__, inode->i_ino, pos, |
685 | __func__, (u32)inode->i_ino, (unsigned long long)pos, | 691 | pos + len); |
686 | (unsigned long long)pos + len); | ||
687 | bsize = AFFS_SB(sb)->s_data_blksize; | 692 | bsize = AFFS_SB(sb)->s_data_blksize; |
688 | data = page_address(page); | 693 | data = page_address(page); |
689 | 694 | ||
@@ -831,8 +836,8 @@ affs_truncate(struct inode *inode) | |||
831 | struct buffer_head *ext_bh; | 836 | struct buffer_head *ext_bh; |
832 | int i; | 837 | int i; |
833 | 838 | ||
834 | pr_debug("truncate(inode=%d, oldsize=%u, newsize=%u)\n", | 839 | pr_debug("truncate(inode=%lu, oldsize=%llu, newsize=%llu)\n", |
835 | (u32)inode->i_ino, (u32)AFFS_I(inode)->mmu_private, (u32)inode->i_size); | 840 | inode->i_ino, AFFS_I(inode)->mmu_private, inode->i_size); |
836 | 841 | ||
837 | last_blk = 0; | 842 | last_blk = 0; |
838 | ext = 0; | 843 | ext = 0; |
@@ -863,7 +868,7 @@ affs_truncate(struct inode *inode) | |||
863 | if (IS_ERR(ext_bh)) { | 868 | if (IS_ERR(ext_bh)) { |
864 | affs_warning(sb, "truncate", | 869 | affs_warning(sb, "truncate", |
865 | "unexpected read error for ext block %u (%ld)", | 870 | "unexpected read error for ext block %u (%ld)", |
866 | (unsigned int)ext, PTR_ERR(ext_bh)); | 871 | ext, PTR_ERR(ext_bh)); |
867 | return; | 872 | return; |
868 | } | 873 | } |
869 | if (AFFS_I(inode)->i_lc) { | 874 | if (AFFS_I(inode)->i_lc) { |
@@ -911,7 +916,7 @@ affs_truncate(struct inode *inode) | |||
911 | if (IS_ERR(bh)) { | 916 | if (IS_ERR(bh)) { |
912 | affs_warning(sb, "truncate", | 917 | affs_warning(sb, "truncate", |
913 | "unexpected read error for last block %u (%ld)", | 918 | "unexpected read error for last block %u (%ld)", |
914 | (unsigned int)ext, PTR_ERR(bh)); | 919 | ext, PTR_ERR(bh)); |
915 | return; | 920 | return; |
916 | } | 921 | } |
917 | tmp = be32_to_cpu(AFFS_DATA_HEAD(bh)->next); | 922 | tmp = be32_to_cpu(AFFS_DATA_HEAD(bh)->next); |
diff --git a/fs/affs/inode.c b/fs/affs/inode.c index d0609a282e1d..6f34510449e8 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c | |||
@@ -13,8 +13,6 @@ | |||
13 | #include <linux/gfp.h> | 13 | #include <linux/gfp.h> |
14 | #include "affs.h" | 14 | #include "affs.h" |
15 | 15 | ||
16 | extern const struct inode_operations affs_symlink_inode_operations; | ||
17 | |||
18 | struct inode *affs_iget(struct super_block *sb, unsigned long ino) | 16 | struct inode *affs_iget(struct super_block *sb, unsigned long ino) |
19 | { | 17 | { |
20 | struct affs_sb_info *sbi = AFFS_SB(sb); | 18 | struct affs_sb_info *sbi = AFFS_SB(sb); |
@@ -348,9 +346,8 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3 | |||
348 | u32 block = 0; | 346 | u32 block = 0; |
349 | int retval; | 347 | int retval; |
350 | 348 | ||
351 | pr_debug("%s(dir=%u, inode=%u, \"%pd\", type=%d)\n", | 349 | pr_debug("%s(dir=%lu, inode=%lu, \"%pd\", type=%d)\n", __func__, |
352 | __func__, (u32)dir->i_ino, | 350 | dir->i_ino, inode->i_ino, dentry, type); |
353 | (u32)inode->i_ino, dentry, type); | ||
354 | 351 | ||
355 | retval = -EIO; | 352 | retval = -EIO; |
356 | bh = affs_bread(sb, inode->i_ino); | 353 | bh = affs_bread(sb, inode->i_ino); |
diff --git a/fs/affs/namei.c b/fs/affs/namei.c index bbc38530e924..ffb7bd82c2a5 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c | |||
@@ -64,15 +64,16 @@ __affs_hash_dentry(struct qstr *qstr, toupper_t toupper, bool notruncate) | |||
64 | { | 64 | { |
65 | const u8 *name = qstr->name; | 65 | const u8 *name = qstr->name; |
66 | unsigned long hash; | 66 | unsigned long hash; |
67 | int i; | 67 | int retval; |
68 | u32 len; | ||
68 | 69 | ||
69 | i = affs_check_name(qstr->name, qstr->len, notruncate); | 70 | retval = affs_check_name(qstr->name, qstr->len, notruncate); |
70 | if (i) | 71 | if (retval) |
71 | return i; | 72 | return retval; |
72 | 73 | ||
73 | hash = init_name_hash(); | 74 | hash = init_name_hash(); |
74 | i = min(qstr->len, 30u); | 75 | len = min(qstr->len, AFFSNAMEMAX); |
75 | for (; i > 0; name++, i--) | 76 | for (; len > 0; name++, len--) |
76 | hash = partial_name_hash(toupper(*name), hash); | 77 | hash = partial_name_hash(toupper(*name), hash); |
77 | qstr->hash = end_name_hash(hash); | 78 | qstr->hash = end_name_hash(hash); |
78 | 79 | ||
@@ -114,10 +115,10 @@ static inline int __affs_compare_dentry(unsigned int len, | |||
114 | * If the names are longer than the allowed 30 chars, | 115 | * If the names are longer than the allowed 30 chars, |
115 | * the excess is ignored, so their length may differ. | 116 | * the excess is ignored, so their length may differ. |
116 | */ | 117 | */ |
117 | if (len >= 30) { | 118 | if (len >= AFFSNAMEMAX) { |
118 | if (name->len < 30) | 119 | if (name->len < AFFSNAMEMAX) |
119 | return 1; | 120 | return 1; |
120 | len = 30; | 121 | len = AFFSNAMEMAX; |
121 | } else if (len != name->len) | 122 | } else if (len != name->len) |
122 | return 1; | 123 | return 1; |
123 | 124 | ||
@@ -156,10 +157,10 @@ affs_match(struct dentry *dentry, const u8 *name2, toupper_t toupper) | |||
156 | const u8 *name = dentry->d_name.name; | 157 | const u8 *name = dentry->d_name.name; |
157 | int len = dentry->d_name.len; | 158 | int len = dentry->d_name.len; |
158 | 159 | ||
159 | if (len >= 30) { | 160 | if (len >= AFFSNAMEMAX) { |
160 | if (*name2 < 30) | 161 | if (*name2 < AFFSNAMEMAX) |
161 | return 0; | 162 | return 0; |
162 | len = 30; | 163 | len = AFFSNAMEMAX; |
163 | } else if (len != *name2) | 164 | } else if (len != *name2) |
164 | return 0; | 165 | return 0; |
165 | 166 | ||
@@ -173,9 +174,9 @@ int | |||
173 | affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len) | 174 | affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len) |
174 | { | 175 | { |
175 | toupper_t toupper = affs_get_toupper(sb); | 176 | toupper_t toupper = affs_get_toupper(sb); |
176 | int hash; | 177 | u32 hash; |
177 | 178 | ||
178 | hash = len = min(len, 30u); | 179 | hash = len = min(len, AFFSNAMEMAX); |
179 | for (; len > 0; len--) | 180 | for (; len > 0; len--) |
180 | hash = (hash * 13 + toupper(*name++)) & 0x7ff; | 181 | hash = (hash * 13 + toupper(*name++)) & 0x7ff; |
181 | 182 | ||
@@ -248,9 +249,8 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | |||
248 | int | 249 | int |
249 | affs_unlink(struct inode *dir, struct dentry *dentry) | 250 | affs_unlink(struct inode *dir, struct dentry *dentry) |
250 | { | 251 | { |
251 | pr_debug("%s(dir=%d, %lu \"%pd\")\n", | 252 | pr_debug("%s(dir=%lu, %lu \"%pd\")\n", __func__, dir->i_ino, |
252 | __func__, (u32)dir->i_ino, dentry->d_inode->i_ino, | 253 | dentry->d_inode->i_ino, dentry); |
253 | dentry); | ||
254 | 254 | ||
255 | return affs_remove_header(dentry); | 255 | return affs_remove_header(dentry); |
256 | } | 256 | } |
@@ -317,9 +317,8 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
317 | int | 317 | int |
318 | affs_rmdir(struct inode *dir, struct dentry *dentry) | 318 | affs_rmdir(struct inode *dir, struct dentry *dentry) |
319 | { | 319 | { |
320 | pr_debug("%s(dir=%u, %lu \"%pd\")\n", | 320 | pr_debug("%s(dir=%lu, %lu \"%pd\")\n", __func__, dir->i_ino, |
321 | __func__, (u32)dir->i_ino, dentry->d_inode->i_ino, | 321 | dentry->d_inode->i_ino, dentry); |
322 | dentry); | ||
323 | 322 | ||
324 | return affs_remove_header(dentry); | 323 | return affs_remove_header(dentry); |
325 | } | 324 | } |
@@ -404,8 +403,7 @@ affs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) | |||
404 | { | 403 | { |
405 | struct inode *inode = old_dentry->d_inode; | 404 | struct inode *inode = old_dentry->d_inode; |
406 | 405 | ||
407 | pr_debug("%s(%u, %u, \"%pd\")\n", | 406 | pr_debug("%s(%lu, %lu, \"%pd\")\n", __func__, inode->i_ino, dir->i_ino, |
408 | __func__, (u32)inode->i_ino, (u32)dir->i_ino, | ||
409 | dentry); | 407 | dentry); |
410 | 408 | ||
411 | return affs_add_entry(dir, inode, dentry, ST_LINKFILE); | 409 | return affs_add_entry(dir, inode, dentry, ST_LINKFILE); |
@@ -419,9 +417,8 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
419 | struct buffer_head *bh = NULL; | 417 | struct buffer_head *bh = NULL; |
420 | int retval; | 418 | int retval; |
421 | 419 | ||
422 | pr_debug("%s(old=%u,\"%pd\" to new=%u,\"%pd\")\n", | 420 | pr_debug("%s(old=%lu,\"%pd\" to new=%lu,\"%pd\")\n", __func__, |
423 | __func__, (u32)old_dir->i_ino, old_dentry, | 421 | old_dir->i_ino, old_dentry, new_dir->i_ino, new_dentry); |
424 | (u32)new_dir->i_ino, new_dentry); | ||
425 | 422 | ||
426 | retval = affs_check_name(new_dentry->d_name.name, | 423 | retval = affs_check_name(new_dentry->d_name.name, |
427 | new_dentry->d_name.len, | 424 | new_dentry->d_name.len, |
diff --git a/fs/affs/super.c b/fs/affs/super.c index f754ab68a840..4cf0e9113fb6 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c | |||
@@ -432,39 +432,39 @@ got_root: | |||
432 | sb->s_flags |= MS_RDONLY; | 432 | sb->s_flags |= MS_RDONLY; |
433 | } | 433 | } |
434 | switch (chksum) { | 434 | switch (chksum) { |
435 | case MUFS_FS: | 435 | case MUFS_FS: |
436 | case MUFS_INTLFFS: | 436 | case MUFS_INTLFFS: |
437 | case MUFS_DCFFS: | 437 | case MUFS_DCFFS: |
438 | sbi->s_flags |= SF_MUFS; | 438 | sbi->s_flags |= SF_MUFS; |
439 | /* fall thru */ | 439 | /* fall thru */ |
440 | case FS_INTLFFS: | 440 | case FS_INTLFFS: |
441 | case FS_DCFFS: | 441 | case FS_DCFFS: |
442 | sbi->s_flags |= SF_INTL; | 442 | sbi->s_flags |= SF_INTL; |
443 | break; | 443 | break; |
444 | case MUFS_FFS: | 444 | case MUFS_FFS: |
445 | sbi->s_flags |= SF_MUFS; | 445 | sbi->s_flags |= SF_MUFS; |
446 | break; | 446 | break; |
447 | case FS_FFS: | 447 | case FS_FFS: |
448 | break; | 448 | break; |
449 | case MUFS_OFS: | 449 | case MUFS_OFS: |
450 | sbi->s_flags |= SF_MUFS; | 450 | sbi->s_flags |= SF_MUFS; |
451 | /* fall thru */ | 451 | /* fall thru */ |
452 | case FS_OFS: | 452 | case FS_OFS: |
453 | sbi->s_flags |= SF_OFS; | 453 | sbi->s_flags |= SF_OFS; |
454 | sb->s_flags |= MS_NOEXEC; | 454 | sb->s_flags |= MS_NOEXEC; |
455 | break; | 455 | break; |
456 | case MUFS_DCOFS: | 456 | case MUFS_DCOFS: |
457 | case MUFS_INTLOFS: | 457 | case MUFS_INTLOFS: |
458 | sbi->s_flags |= SF_MUFS; | 458 | sbi->s_flags |= SF_MUFS; |
459 | case FS_DCOFS: | 459 | case FS_DCOFS: |
460 | case FS_INTLOFS: | 460 | case FS_INTLOFS: |
461 | sbi->s_flags |= SF_INTL | SF_OFS; | 461 | sbi->s_flags |= SF_INTL | SF_OFS; |
462 | sb->s_flags |= MS_NOEXEC; | 462 | sb->s_flags |= MS_NOEXEC; |
463 | break; | 463 | break; |
464 | default: | 464 | default: |
465 | pr_err("Unknown filesystem on device %s: %08X\n", | 465 | pr_err("Unknown filesystem on device %s: %08X\n", |
466 | sb->s_id, chksum); | 466 | sb->s_id, chksum); |
467 | return -EINVAL; | 467 | return -EINVAL; |
468 | } | 468 | } |
469 | 469 | ||
470 | if (mount_flags & SF_VERBOSE) { | 470 | if (mount_flags & SF_VERBOSE) { |
@@ -584,7 +584,7 @@ affs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
584 | buf->f_bavail = free; | 584 | buf->f_bavail = free; |
585 | buf->f_fsid.val[0] = (u32)id; | 585 | buf->f_fsid.val[0] = (u32)id; |
586 | buf->f_fsid.val[1] = (u32)(id >> 32); | 586 | buf->f_fsid.val[1] = (u32)(id >> 32); |
587 | buf->f_namelen = 30; | 587 | buf->f_namelen = AFFSNAMEMAX; |
588 | return 0; | 588 | return 0; |
589 | } | 589 | } |
590 | 590 | ||
@@ -602,6 +602,7 @@ static void affs_kill_sb(struct super_block *sb) | |||
602 | affs_free_bitmap(sb); | 602 | affs_free_bitmap(sb); |
603 | affs_brelse(sbi->s_root_bh); | 603 | affs_brelse(sbi->s_root_bh); |
604 | kfree(sbi->s_prefix); | 604 | kfree(sbi->s_prefix); |
605 | mutex_destroy(&sbi->s_bmlock); | ||
605 | kfree(sbi); | 606 | kfree(sbi); |
606 | } | 607 | } |
607 | } | 608 | } |
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 06e14bfb3496..dbc732e9a5c0 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c | |||
@@ -306,8 +306,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg, | |||
306 | 306 | ||
307 | _debug("- range %u-%u%s", | 307 | _debug("- range %u-%u%s", |
308 | offset, to, msg->msg_flags ? " [more]" : ""); | 308 | offset, to, msg->msg_flags ? " [more]" : ""); |
309 | iov_iter_init(&msg->msg_iter, WRITE, | 309 | iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, |
310 | (struct iovec *) iov, 1, to - offset); | 310 | iov, 1, to - offset); |
311 | 311 | ||
312 | /* have to change the state *before* sending the last | 312 | /* have to change the state *before* sending the last |
313 | * packet as RxRPC might give us the reply before it | 313 | * packet as RxRPC might give us the reply before it |
@@ -384,7 +384,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, | |||
384 | 384 | ||
385 | msg.msg_name = NULL; | 385 | msg.msg_name = NULL; |
386 | msg.msg_namelen = 0; | 386 | msg.msg_namelen = 0; |
387 | iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *)iov, 1, | 387 | iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1, |
388 | call->request_size); | 388 | call->request_size); |
389 | msg.msg_control = NULL; | 389 | msg.msg_control = NULL; |
390 | msg.msg_controllen = 0; | 390 | msg.msg_controllen = 0; |
@@ -770,7 +770,7 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb, | |||
770 | void afs_send_empty_reply(struct afs_call *call) | 770 | void afs_send_empty_reply(struct afs_call *call) |
771 | { | 771 | { |
772 | struct msghdr msg; | 772 | struct msghdr msg; |
773 | struct iovec iov[1]; | 773 | struct kvec iov[1]; |
774 | 774 | ||
775 | _enter(""); | 775 | _enter(""); |
776 | 776 | ||
@@ -778,7 +778,7 @@ void afs_send_empty_reply(struct afs_call *call) | |||
778 | iov[0].iov_len = 0; | 778 | iov[0].iov_len = 0; |
779 | msg.msg_name = NULL; | 779 | msg.msg_name = NULL; |
780 | msg.msg_namelen = 0; | 780 | msg.msg_namelen = 0; |
781 | iov_iter_init(&msg.msg_iter, WRITE, iov, 0, 0); /* WTF? */ | 781 | iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 0, 0); /* WTF? */ |
782 | msg.msg_control = NULL; | 782 | msg.msg_control = NULL; |
783 | msg.msg_controllen = 0; | 783 | msg.msg_controllen = 0; |
784 | msg.msg_flags = 0; | 784 | msg.msg_flags = 0; |
@@ -805,7 +805,7 @@ void afs_send_empty_reply(struct afs_call *call) | |||
805 | void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) | 805 | void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) |
806 | { | 806 | { |
807 | struct msghdr msg; | 807 | struct msghdr msg; |
808 | struct iovec iov[1]; | 808 | struct kvec iov[1]; |
809 | int n; | 809 | int n; |
810 | 810 | ||
811 | _enter(""); | 811 | _enter(""); |
@@ -814,7 +814,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) | |||
814 | iov[0].iov_len = len; | 814 | iov[0].iov_len = len; |
815 | msg.msg_name = NULL; | 815 | msg.msg_name = NULL; |
816 | msg.msg_namelen = 0; | 816 | msg.msg_namelen = 0; |
817 | iov_iter_init(&msg.msg_iter, WRITE, iov, 1, len); | 817 | iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1, len); |
818 | msg.msg_control = NULL; | 818 | msg.msg_control = NULL; |
819 | msg.msg_controllen = 0; | 819 | msg.msg_controllen = 0; |
820 | msg.msg_flags = 0; | 820 | msg.msg_flags = 0; |
diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 2b607257820c..d142a2449e65 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c | |||
@@ -106,7 +106,7 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params) | |||
106 | volume->cell = params->cell; | 106 | volume->cell = params->cell; |
107 | volume->vid = vlocation->vldb.vid[params->type]; | 107 | volume->vid = vlocation->vldb.vid[params->type]; |
108 | 108 | ||
109 | ret = bdi_setup_and_register(&volume->bdi, "afs", BDI_CAP_MAP_COPY); | 109 | ret = bdi_setup_and_register(&volume->bdi, "afs"); |
110 | if (ret) | 110 | if (ret) |
111 | goto error_bdi; | 111 | goto error_bdi; |
112 | 112 | ||
@@ -165,15 +165,6 @@ static struct vfsmount *aio_mnt; | |||
165 | static const struct file_operations aio_ring_fops; | 165 | static const struct file_operations aio_ring_fops; |
166 | static const struct address_space_operations aio_ctx_aops; | 166 | static const struct address_space_operations aio_ctx_aops; |
167 | 167 | ||
168 | /* Backing dev info for aio fs. | ||
169 | * -no dirty page accounting or writeback happens | ||
170 | */ | ||
171 | static struct backing_dev_info aio_fs_backing_dev_info = { | ||
172 | .name = "aiofs", | ||
173 | .state = 0, | ||
174 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_MAP_COPY, | ||
175 | }; | ||
176 | |||
177 | static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) | 168 | static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) |
178 | { | 169 | { |
179 | struct qstr this = QSTR_INIT("[aio]", 5); | 170 | struct qstr this = QSTR_INIT("[aio]", 5); |
@@ -185,7 +176,6 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) | |||
185 | 176 | ||
186 | inode->i_mapping->a_ops = &aio_ctx_aops; | 177 | inode->i_mapping->a_ops = &aio_ctx_aops; |
187 | inode->i_mapping->private_data = ctx; | 178 | inode->i_mapping->private_data = ctx; |
188 | inode->i_mapping->backing_dev_info = &aio_fs_backing_dev_info; | ||
189 | inode->i_size = PAGE_SIZE * nr_pages; | 179 | inode->i_size = PAGE_SIZE * nr_pages; |
190 | 180 | ||
191 | path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this); | 181 | path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this); |
@@ -230,9 +220,6 @@ static int __init aio_setup(void) | |||
230 | if (IS_ERR(aio_mnt)) | 220 | if (IS_ERR(aio_mnt)) |
231 | panic("Failed to create aio fs mount."); | 221 | panic("Failed to create aio fs mount."); |
232 | 222 | ||
233 | if (bdi_init(&aio_fs_backing_dev_info)) | ||
234 | panic("Failed to init aio fs backing dev info."); | ||
235 | |||
236 | kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); | 223 | kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); |
237 | kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); | 224 | kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); |
238 | 225 | ||
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index edf47774b03d..e089f1985fca 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
@@ -274,9 +274,9 @@ more: | |||
274 | static struct inode * | 274 | static struct inode * |
275 | befs_alloc_inode(struct super_block *sb) | 275 | befs_alloc_inode(struct super_block *sb) |
276 | { | 276 | { |
277 | struct befs_inode_info *bi; | 277 | struct befs_inode_info *bi; |
278 | bi = (struct befs_inode_info *)kmem_cache_alloc(befs_inode_cachep, | 278 | |
279 | GFP_KERNEL); | 279 | bi = kmem_cache_alloc(befs_inode_cachep, GFP_KERNEL); |
280 | if (!bi) | 280 | if (!bi) |
281 | return NULL; | 281 | return NULL; |
282 | return &bi->vfs_inode; | 282 | return &bi->vfs_inode; |
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c deleted file mode 100644 index 4e00ed68d4a6..000000000000 --- a/fs/binfmt_som.c +++ /dev/null | |||
@@ -1,299 +0,0 @@ | |||
1 | /* | ||
2 | * linux/fs/binfmt_som.c | ||
3 | * | ||
4 | * These are the functions used to load SOM format executables as used | ||
5 | * by HP-UX. | ||
6 | * | ||
7 | * Copyright 1999 Matthew Wilcox <willy@bofh.ai> | ||
8 | * based on binfmt_elf which is | ||
9 | * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com). | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | |||
14 | #include <linux/fs.h> | ||
15 | #include <linux/stat.h> | ||
16 | #include <linux/sched.h> | ||
17 | #include <linux/mm.h> | ||
18 | #include <linux/mman.h> | ||
19 | #include <linux/errno.h> | ||
20 | #include <linux/signal.h> | ||
21 | #include <linux/binfmts.h> | ||
22 | #include <linux/som.h> | ||
23 | #include <linux/string.h> | ||
24 | #include <linux/file.h> | ||
25 | #include <linux/fcntl.h> | ||
26 | #include <linux/ptrace.h> | ||
27 | #include <linux/slab.h> | ||
28 | #include <linux/shm.h> | ||
29 | #include <linux/personality.h> | ||
30 | #include <linux/init.h> | ||
31 | |||
32 | #include <asm/uaccess.h> | ||
33 | #include <asm/pgtable.h> | ||
34 | |||
35 | |||
36 | #include <linux/elf.h> | ||
37 | |||
38 | static int load_som_binary(struct linux_binprm * bprm); | ||
39 | static int load_som_library(struct file *); | ||
40 | |||
41 | /* | ||
42 | * If we don't support core dumping, then supply a NULL so we | ||
43 | * don't even try. | ||
44 | */ | ||
45 | #if 0 | ||
46 | static int som_core_dump(struct coredump_params *cprm); | ||
47 | #else | ||
48 | #define som_core_dump NULL | ||
49 | #endif | ||
50 | |||
51 | #define SOM_PAGESTART(_v) ((_v) & ~(unsigned long)(SOM_PAGESIZE-1)) | ||
52 | #define SOM_PAGEOFFSET(_v) ((_v) & (SOM_PAGESIZE-1)) | ||
53 | #define SOM_PAGEALIGN(_v) (((_v) + SOM_PAGESIZE - 1) & ~(SOM_PAGESIZE - 1)) | ||
54 | |||
55 | static struct linux_binfmt som_format = { | ||
56 | .module = THIS_MODULE, | ||
57 | .load_binary = load_som_binary, | ||
58 | .load_shlib = load_som_library, | ||
59 | .core_dump = som_core_dump, | ||
60 | .min_coredump = SOM_PAGESIZE | ||
61 | }; | ||
62 | |||
63 | /* | ||
64 | * create_som_tables() parses the env- and arg-strings in new user | ||
65 | * memory and creates the pointer tables from them, and puts their | ||
66 | * addresses on the "stack", returning the new stack pointer value. | ||
67 | */ | ||
68 | static void create_som_tables(struct linux_binprm *bprm) | ||
69 | { | ||
70 | char **argv, **envp; | ||
71 | int argc = bprm->argc; | ||
72 | int envc = bprm->envc; | ||
73 | unsigned long p; | ||
74 | unsigned long *sp; | ||
75 | |||
76 | /* Word-align the stack pointer */ | ||
77 | sp = (unsigned long *)((bprm->p + 3) & ~3); | ||
78 | |||
79 | envp = (char **) sp; | ||
80 | sp += envc + 1; | ||
81 | argv = (char **) sp; | ||
82 | sp += argc + 1; | ||
83 | |||
84 | __put_user((unsigned long) envp,++sp); | ||
85 | __put_user((unsigned long) argv,++sp); | ||
86 | |||
87 | __put_user(argc, ++sp); | ||
88 | |||
89 | bprm->p = (unsigned long) sp; | ||
90 | |||
91 | p = current->mm->arg_start; | ||
92 | while (argc-- > 0) { | ||
93 | __put_user((char *)p,argv++); | ||
94 | p += strlen_user((char *)p); | ||
95 | } | ||
96 | __put_user(NULL, argv); | ||
97 | current->mm->arg_end = current->mm->env_start = p; | ||
98 | while (envc-- > 0) { | ||
99 | __put_user((char *)p,envp++); | ||
100 | p += strlen_user((char *)p); | ||
101 | } | ||
102 | __put_user(NULL, envp); | ||
103 | current->mm->env_end = p; | ||
104 | } | ||
105 | |||
106 | static int check_som_header(struct som_hdr *som_ex) | ||
107 | { | ||
108 | int *buf = (int *)som_ex; | ||
109 | int i, ck; | ||
110 | |||
111 | if (som_ex->system_id != SOM_SID_PARISC_1_0 && | ||
112 | som_ex->system_id != SOM_SID_PARISC_1_1 && | ||
113 | som_ex->system_id != SOM_SID_PARISC_2_0) | ||
114 | return -ENOEXEC; | ||
115 | |||
116 | if (som_ex->a_magic != SOM_EXEC_NONSHARE && | ||
117 | som_ex->a_magic != SOM_EXEC_SHARE && | ||
118 | som_ex->a_magic != SOM_EXEC_DEMAND) | ||
119 | return -ENOEXEC; | ||
120 | |||
121 | if (som_ex->version_id != SOM_ID_OLD && | ||
122 | som_ex->version_id != SOM_ID_NEW) | ||
123 | return -ENOEXEC; | ||
124 | |||
125 | ck = 0; | ||
126 | for (i=0; i<32; i++) | ||
127 | ck ^= buf[i]; | ||
128 | if (ck != 0) | ||
129 | return -ENOEXEC; | ||
130 | |||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | static int map_som_binary(struct file *file, | ||
135 | const struct som_exec_auxhdr *hpuxhdr) | ||
136 | { | ||
137 | unsigned long code_start, code_size, data_start, data_size; | ||
138 | unsigned long bss_start, som_brk; | ||
139 | int retval; | ||
140 | int prot = PROT_READ | PROT_EXEC; | ||
141 | int flags = MAP_FIXED|MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE; | ||
142 | |||
143 | mm_segment_t old_fs = get_fs(); | ||
144 | set_fs(get_ds()); | ||
145 | |||
146 | code_start = SOM_PAGESTART(hpuxhdr->exec_tmem); | ||
147 | code_size = SOM_PAGEALIGN(hpuxhdr->exec_tsize); | ||
148 | current->mm->start_code = code_start; | ||
149 | current->mm->end_code = code_start + code_size; | ||
150 | retval = vm_mmap(file, code_start, code_size, prot, | ||
151 | flags, SOM_PAGESTART(hpuxhdr->exec_tfile)); | ||
152 | if (retval < 0 && retval > -1024) | ||
153 | goto out; | ||
154 | |||
155 | data_start = SOM_PAGESTART(hpuxhdr->exec_dmem); | ||
156 | data_size = SOM_PAGEALIGN(hpuxhdr->exec_dsize); | ||
157 | current->mm->start_data = data_start; | ||
158 | current->mm->end_data = bss_start = data_start + data_size; | ||
159 | retval = vm_mmap(file, data_start, data_size, | ||
160 | prot | PROT_WRITE, flags, | ||
161 | SOM_PAGESTART(hpuxhdr->exec_dfile)); | ||
162 | if (retval < 0 && retval > -1024) | ||
163 | goto out; | ||
164 | |||
165 | som_brk = bss_start + SOM_PAGEALIGN(hpuxhdr->exec_bsize); | ||
166 | current->mm->start_brk = current->mm->brk = som_brk; | ||
167 | retval = vm_mmap(NULL, bss_start, som_brk - bss_start, | ||
168 | prot | PROT_WRITE, MAP_FIXED | MAP_PRIVATE, 0); | ||
169 | if (retval > 0 || retval < -1024) | ||
170 | retval = 0; | ||
171 | out: | ||
172 | set_fs(old_fs); | ||
173 | return retval; | ||
174 | } | ||
175 | |||
176 | |||
177 | /* | ||
178 | * These are the functions used to load SOM executables and shared | ||
179 | * libraries. There is no binary dependent code anywhere else. | ||
180 | */ | ||
181 | |||
182 | static int | ||
183 | load_som_binary(struct linux_binprm * bprm) | ||
184 | { | ||
185 | int retval; | ||
186 | unsigned int size; | ||
187 | unsigned long som_entry; | ||
188 | struct som_hdr *som_ex; | ||
189 | struct som_exec_auxhdr *hpuxhdr; | ||
190 | struct pt_regs *regs = current_pt_regs(); | ||
191 | |||
192 | /* Get the exec-header */ | ||
193 | som_ex = (struct som_hdr *) bprm->buf; | ||
194 | |||
195 | retval = check_som_header(som_ex); | ||
196 | if (retval != 0) | ||
197 | goto out; | ||
198 | |||
199 | /* Now read in the auxiliary header information */ | ||
200 | |||
201 | retval = -ENOMEM; | ||
202 | size = som_ex->aux_header_size; | ||
203 | if (size > SOM_PAGESIZE) | ||
204 | goto out; | ||
205 | hpuxhdr = kmalloc(size, GFP_KERNEL); | ||
206 | if (!hpuxhdr) | ||
207 | goto out; | ||
208 | |||
209 | retval = kernel_read(bprm->file, som_ex->aux_header_location, | ||
210 | (char *) hpuxhdr, size); | ||
211 | if (retval != size) { | ||
212 | if (retval >= 0) | ||
213 | retval = -EIO; | ||
214 | goto out_free; | ||
215 | } | ||
216 | |||
217 | /* Flush all traces of the currently running executable */ | ||
218 | retval = flush_old_exec(bprm); | ||
219 | if (retval) | ||
220 | goto out_free; | ||
221 | |||
222 | /* OK, This is the point of no return */ | ||
223 | current->personality = PER_HPUX; | ||
224 | setup_new_exec(bprm); | ||
225 | |||
226 | /* Set the task size for HP-UX processes such that | ||
227 | * the gateway page is outside the address space. | ||
228 | * This can be fixed later, but for now, this is much | ||
229 | * easier. | ||
230 | */ | ||
231 | |||
232 | current->thread.task_size = 0xc0000000; | ||
233 | |||
234 | /* Set map base to allow enough room for hp-ux heap growth */ | ||
235 | |||
236 | current->thread.map_base = 0x80000000; | ||
237 | |||
238 | retval = map_som_binary(bprm->file, hpuxhdr); | ||
239 | if (retval < 0) | ||
240 | goto out_free; | ||
241 | |||
242 | som_entry = hpuxhdr->exec_entry; | ||
243 | kfree(hpuxhdr); | ||
244 | |||
245 | set_binfmt(&som_format); | ||
246 | install_exec_creds(bprm); | ||
247 | setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT); | ||
248 | |||
249 | create_som_tables(bprm); | ||
250 | |||
251 | current->mm->start_stack = bprm->p; | ||
252 | |||
253 | #if 0 | ||
254 | printk("(start_brk) %08lx\n" , (unsigned long) current->mm->start_brk); | ||
255 | printk("(end_code) %08lx\n" , (unsigned long) current->mm->end_code); | ||
256 | printk("(start_code) %08lx\n" , (unsigned long) current->mm->start_code); | ||
257 | printk("(end_data) %08lx\n" , (unsigned long) current->mm->end_data); | ||
258 | printk("(start_stack) %08lx\n" , (unsigned long) current->mm->start_stack); | ||
259 | printk("(brk) %08lx\n" , (unsigned long) current->mm->brk); | ||
260 | #endif | ||
261 | |||
262 | map_hpux_gateway_page(current,current->mm); | ||
263 | |||
264 | start_thread_som(regs, som_entry, bprm->p); | ||
265 | return 0; | ||
266 | |||
267 | /* error cleanup */ | ||
268 | out_free: | ||
269 | kfree(hpuxhdr); | ||
270 | out: | ||
271 | return retval; | ||
272 | } | ||
273 | |||
274 | static int load_som_library(struct file *f) | ||
275 | { | ||
276 | /* No lib support in SOM yet. gizza chance.. */ | ||
277 | return -ENOEXEC; | ||
278 | } | ||
279 | /* Install the SOM loader. | ||
280 | * N.B. We *rely* on the table being the right size with the | ||
281 | * right number of free slots... | ||
282 | */ | ||
283 | |||
284 | static int __init init_som_binfmt(void) | ||
285 | { | ||
286 | register_binfmt(&som_format); | ||
287 | return 0; | ||
288 | } | ||
289 | |||
290 | static void __exit exit_som_binfmt(void) | ||
291 | { | ||
292 | /* Remove the SOM loader. */ | ||
293 | unregister_binfmt(&som_format); | ||
294 | } | ||
295 | |||
296 | core_initcall(init_som_binfmt); | ||
297 | module_exit(exit_som_binfmt); | ||
298 | |||
299 | MODULE_LICENSE("GPL"); | ||
diff --git a/fs/block_dev.c b/fs/block_dev.c index b48c41bf0f86..975266be67d3 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -49,23 +49,15 @@ inline struct block_device *I_BDEV(struct inode *inode) | |||
49 | } | 49 | } |
50 | EXPORT_SYMBOL(I_BDEV); | 50 | EXPORT_SYMBOL(I_BDEV); |
51 | 51 | ||
52 | /* | 52 | static void bdev_write_inode(struct inode *inode) |
53 | * Move the inode from its current bdi to a new bdi. Make sure the inode | ||
54 | * is clean before moving so that it doesn't linger on the old bdi. | ||
55 | */ | ||
56 | static void bdev_inode_switch_bdi(struct inode *inode, | ||
57 | struct backing_dev_info *dst) | ||
58 | { | 53 | { |
59 | while (true) { | 54 | spin_lock(&inode->i_lock); |
60 | spin_lock(&inode->i_lock); | 55 | while (inode->i_state & I_DIRTY) { |
61 | if (!(inode->i_state & I_DIRTY)) { | ||
62 | inode->i_data.backing_dev_info = dst; | ||
63 | spin_unlock(&inode->i_lock); | ||
64 | return; | ||
65 | } | ||
66 | spin_unlock(&inode->i_lock); | 56 | spin_unlock(&inode->i_lock); |
67 | WARN_ON_ONCE(write_inode_now(inode, true)); | 57 | WARN_ON_ONCE(write_inode_now(inode, true)); |
58 | spin_lock(&inode->i_lock); | ||
68 | } | 59 | } |
60 | spin_unlock(&inode->i_lock); | ||
69 | } | 61 | } |
70 | 62 | ||
71 | /* Kill _all_ buffers and pagecache , dirty or not.. */ | 63 | /* Kill _all_ buffers and pagecache , dirty or not.. */ |
@@ -429,6 +421,46 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, | |||
429 | } | 421 | } |
430 | EXPORT_SYMBOL_GPL(bdev_write_page); | 422 | EXPORT_SYMBOL_GPL(bdev_write_page); |
431 | 423 | ||
424 | /** | ||
425 | * bdev_direct_access() - Get the address for directly-accessibly memory | ||
426 | * @bdev: The device containing the memory | ||
427 | * @sector: The offset within the device | ||
428 | * @addr: Where to put the address of the memory | ||
429 | * @pfn: The Page Frame Number for the memory | ||
430 | * @size: The number of bytes requested | ||
431 | * | ||
432 | * If a block device is made up of directly addressable memory, this function | ||
433 | * will tell the caller the PFN and the address of the memory. The address | ||
434 | * may be directly dereferenced within the kernel without the need to call | ||
435 | * ioremap(), kmap() or similar. The PFN is suitable for inserting into | ||
436 | * page tables. | ||
437 | * | ||
438 | * Return: negative errno if an error occurs, otherwise the number of bytes | ||
439 | * accessible at this address. | ||
440 | */ | ||
441 | long bdev_direct_access(struct block_device *bdev, sector_t sector, | ||
442 | void **addr, unsigned long *pfn, long size) | ||
443 | { | ||
444 | long avail; | ||
445 | const struct block_device_operations *ops = bdev->bd_disk->fops; | ||
446 | |||
447 | if (size < 0) | ||
448 | return size; | ||
449 | if (!ops->direct_access) | ||
450 | return -EOPNOTSUPP; | ||
451 | if ((sector + DIV_ROUND_UP(size, 512)) > | ||
452 | part_nr_sects_read(bdev->bd_part)) | ||
453 | return -ERANGE; | ||
454 | sector += get_start_sect(bdev); | ||
455 | if (sector % (PAGE_SIZE / 512)) | ||
456 | return -EINVAL; | ||
457 | avail = ops->direct_access(bdev, sector, addr, pfn, size); | ||
458 | if (!avail) | ||
459 | return -ERANGE; | ||
460 | return min(avail, size); | ||
461 | } | ||
462 | EXPORT_SYMBOL_GPL(bdev_direct_access); | ||
463 | |||
432 | /* | 464 | /* |
433 | * pseudo-fs | 465 | * pseudo-fs |
434 | */ | 466 | */ |
@@ -584,7 +616,6 @@ struct block_device *bdget(dev_t dev) | |||
584 | inode->i_bdev = bdev; | 616 | inode->i_bdev = bdev; |
585 | inode->i_data.a_ops = &def_blk_aops; | 617 | inode->i_data.a_ops = &def_blk_aops; |
586 | mapping_set_gfp_mask(&inode->i_data, GFP_USER); | 618 | mapping_set_gfp_mask(&inode->i_data, GFP_USER); |
587 | inode->i_data.backing_dev_info = &default_backing_dev_info; | ||
588 | spin_lock(&bdev_lock); | 619 | spin_lock(&bdev_lock); |
589 | list_add(&bdev->bd_list, &all_bdevs); | 620 | list_add(&bdev->bd_list, &all_bdevs); |
590 | spin_unlock(&bdev_lock); | 621 | spin_unlock(&bdev_lock); |
@@ -1145,8 +1176,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1145 | bdev->bd_queue = disk->queue; | 1176 | bdev->bd_queue = disk->queue; |
1146 | bdev->bd_contains = bdev; | 1177 | bdev->bd_contains = bdev; |
1147 | if (!partno) { | 1178 | if (!partno) { |
1148 | struct backing_dev_info *bdi; | ||
1149 | |||
1150 | ret = -ENXIO; | 1179 | ret = -ENXIO; |
1151 | bdev->bd_part = disk_get_part(disk, partno); | 1180 | bdev->bd_part = disk_get_part(disk, partno); |
1152 | if (!bdev->bd_part) | 1181 | if (!bdev->bd_part) |
@@ -1172,11 +1201,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1172 | } | 1201 | } |
1173 | } | 1202 | } |
1174 | 1203 | ||
1175 | if (!ret) { | 1204 | if (!ret) |
1176 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); | 1205 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); |
1177 | bdi = blk_get_backing_dev_info(bdev); | ||
1178 | bdev_inode_switch_bdi(bdev->bd_inode, bdi); | ||
1179 | } | ||
1180 | 1206 | ||
1181 | /* | 1207 | /* |
1182 | * If the device is invalidated, rescan partition | 1208 | * If the device is invalidated, rescan partition |
@@ -1203,8 +1229,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1203 | if (ret) | 1229 | if (ret) |
1204 | goto out_clear; | 1230 | goto out_clear; |
1205 | bdev->bd_contains = whole; | 1231 | bdev->bd_contains = whole; |
1206 | bdev_inode_switch_bdi(bdev->bd_inode, | ||
1207 | whole->bd_inode->i_data.backing_dev_info); | ||
1208 | bdev->bd_part = disk_get_part(disk, partno); | 1232 | bdev->bd_part = disk_get_part(disk, partno); |
1209 | if (!(disk->flags & GENHD_FL_UP) || | 1233 | if (!(disk->flags & GENHD_FL_UP) || |
1210 | !bdev->bd_part || !bdev->bd_part->nr_sects) { | 1234 | !bdev->bd_part || !bdev->bd_part->nr_sects) { |
@@ -1244,7 +1268,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1244 | bdev->bd_disk = NULL; | 1268 | bdev->bd_disk = NULL; |
1245 | bdev->bd_part = NULL; | 1269 | bdev->bd_part = NULL; |
1246 | bdev->bd_queue = NULL; | 1270 | bdev->bd_queue = NULL; |
1247 | bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info); | ||
1248 | if (bdev != bdev->bd_contains) | 1271 | if (bdev != bdev->bd_contains) |
1249 | __blkdev_put(bdev->bd_contains, mode, 1); | 1272 | __blkdev_put(bdev->bd_contains, mode, 1); |
1250 | bdev->bd_contains = NULL; | 1273 | bdev->bd_contains = NULL; |
@@ -1464,11 +1487,11 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) | |||
1464 | WARN_ON_ONCE(bdev->bd_holders); | 1487 | WARN_ON_ONCE(bdev->bd_holders); |
1465 | sync_blockdev(bdev); | 1488 | sync_blockdev(bdev); |
1466 | kill_bdev(bdev); | 1489 | kill_bdev(bdev); |
1467 | /* ->release can cause the old bdi to disappear, | 1490 | /* |
1468 | * so must switch it out first | 1491 | * ->release can cause the queue to disappear, so flush all |
1492 | * dirty data before. | ||
1469 | */ | 1493 | */ |
1470 | bdev_inode_switch_bdi(bdev->bd_inode, | 1494 | bdev_write_inode(bdev->bd_inode); |
1471 | &default_backing_dev_info); | ||
1472 | } | 1495 | } |
1473 | if (bdev->bd_contains == bdev) { | 1496 | if (bdev->bd_contains == bdev) { |
1474 | if (disk->fops->release) | 1497 | if (disk->fops->release) |
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index a66768ebc8d1..80e9c18ea64f 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig | |||
@@ -8,6 +8,7 @@ config BTRFS_FS | |||
8 | select LZO_DECOMPRESS | 8 | select LZO_DECOMPRESS |
9 | select RAID6_PQ | 9 | select RAID6_PQ |
10 | select XOR_BLOCKS | 10 | select XOR_BLOCKS |
11 | select SRCU | ||
11 | 12 | ||
12 | help | 13 | help |
13 | Btrfs is a general purpose copy-on-write filesystem with extents, | 14 | Btrfs is a general purpose copy-on-write filesystem with extents, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8c63419a7f70..1afb18226da8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1715,12 +1715,11 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | |||
1715 | { | 1715 | { |
1716 | int err; | 1716 | int err; |
1717 | 1717 | ||
1718 | bdi->capabilities = BDI_CAP_MAP_COPY; | 1718 | err = bdi_setup_and_register(bdi, "btrfs"); |
1719 | err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY); | ||
1720 | if (err) | 1719 | if (err) |
1721 | return err; | 1720 | return err; |
1722 | 1721 | ||
1723 | bdi->ra_pages = default_backing_dev_info.ra_pages; | 1722 | bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE; |
1724 | bdi->congested_fn = btrfs_congested_fn; | 1723 | bdi->congested_fn = btrfs_congested_fn; |
1725 | bdi->congested_data = info; | 1724 | bdi->congested_data = info; |
1726 | return 0; | 1725 | return 0; |
@@ -2319,7 +2318,6 @@ int open_ctree(struct super_block *sb, | |||
2319 | */ | 2318 | */ |
2320 | fs_info->btree_inode->i_size = OFFSET_MAX; | 2319 | fs_info->btree_inode->i_size = OFFSET_MAX; |
2321 | fs_info->btree_inode->i_mapping->a_ops = &btree_aops; | 2320 | fs_info->btree_inode->i_mapping->a_ops = &btree_aops; |
2322 | fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi; | ||
2323 | 2321 | ||
2324 | RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); | 2322 | RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); |
2325 | extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, | 2323 | extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 790dbae3343c..c73df6a7c9b6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -1407,8 +1407,8 @@ int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) | |||
1407 | while (index <= end_index) { | 1407 | while (index <= end_index) { |
1408 | page = find_get_page(inode->i_mapping, index); | 1408 | page = find_get_page(inode->i_mapping, index); |
1409 | BUG_ON(!page); /* Pages should be in the extent_io_tree */ | 1409 | BUG_ON(!page); /* Pages should be in the extent_io_tree */ |
1410 | account_page_redirty(page); | ||
1411 | __set_page_dirty_nobuffers(page); | 1410 | __set_page_dirty_nobuffers(page); |
1411 | account_page_redirty(page); | ||
1412 | page_cache_release(page); | 1412 | page_cache_release(page); |
1413 | index++; | 1413 | index++; |
1414 | } | 1414 | } |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e4090259569b..b78bbbac900d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -1746,7 +1746,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, | |||
1746 | 1746 | ||
1747 | mutex_lock(&inode->i_mutex); | 1747 | mutex_lock(&inode->i_mutex); |
1748 | 1748 | ||
1749 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | 1749 | current->backing_dev_info = inode_to_bdi(inode); |
1750 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 1750 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
1751 | if (err) { | 1751 | if (err) { |
1752 | mutex_unlock(&inode->i_mutex); | 1752 | mutex_unlock(&inode->i_mutex); |
@@ -2081,7 +2081,6 @@ static const struct vm_operations_struct btrfs_file_vm_ops = { | |||
2081 | .fault = filemap_fault, | 2081 | .fault = filemap_fault, |
2082 | .map_pages = filemap_map_pages, | 2082 | .map_pages = filemap_map_pages, |
2083 | .page_mkwrite = btrfs_page_mkwrite, | 2083 | .page_mkwrite = btrfs_page_mkwrite, |
2084 | .remap_pages = generic_file_remap_pages, | ||
2085 | }; | 2084 | }; |
2086 | 2085 | ||
2087 | static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | 2086 | static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8bf326affb94..54bcf639d1cf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -3608,7 +3608,6 @@ cache_acl: | |||
3608 | switch (inode->i_mode & S_IFMT) { | 3608 | switch (inode->i_mode & S_IFMT) { |
3609 | case S_IFREG: | 3609 | case S_IFREG: |
3610 | inode->i_mapping->a_ops = &btrfs_aops; | 3610 | inode->i_mapping->a_ops = &btrfs_aops; |
3611 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | ||
3612 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 3611 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
3613 | inode->i_fop = &btrfs_file_operations; | 3612 | inode->i_fop = &btrfs_file_operations; |
3614 | inode->i_op = &btrfs_file_inode_operations; | 3613 | inode->i_op = &btrfs_file_inode_operations; |
@@ -3623,7 +3622,6 @@ cache_acl: | |||
3623 | case S_IFLNK: | 3622 | case S_IFLNK: |
3624 | inode->i_op = &btrfs_symlink_inode_operations; | 3623 | inode->i_op = &btrfs_symlink_inode_operations; |
3625 | inode->i_mapping->a_ops = &btrfs_symlink_aops; | 3624 | inode->i_mapping->a_ops = &btrfs_symlink_aops; |
3626 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | ||
3627 | break; | 3625 | break; |
3628 | default: | 3626 | default: |
3629 | inode->i_op = &btrfs_special_inode_operations; | 3627 | inode->i_op = &btrfs_special_inode_operations; |
@@ -6088,7 +6086,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
6088 | inode->i_fop = &btrfs_file_operations; | 6086 | inode->i_fop = &btrfs_file_operations; |
6089 | inode->i_op = &btrfs_file_inode_operations; | 6087 | inode->i_op = &btrfs_file_inode_operations; |
6090 | inode->i_mapping->a_ops = &btrfs_aops; | 6088 | inode->i_mapping->a_ops = &btrfs_aops; |
6091 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | ||
6092 | 6089 | ||
6093 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); | 6090 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); |
6094 | if (err) | 6091 | if (err) |
@@ -9203,7 +9200,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
9203 | inode->i_fop = &btrfs_file_operations; | 9200 | inode->i_fop = &btrfs_file_operations; |
9204 | inode->i_op = &btrfs_file_inode_operations; | 9201 | inode->i_op = &btrfs_file_inode_operations; |
9205 | inode->i_mapping->a_ops = &btrfs_aops; | 9202 | inode->i_mapping->a_ops = &btrfs_aops; |
9206 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | ||
9207 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 9203 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
9208 | 9204 | ||
9209 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); | 9205 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); |
@@ -9247,7 +9243,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
9247 | 9243 | ||
9248 | inode->i_op = &btrfs_symlink_inode_operations; | 9244 | inode->i_op = &btrfs_symlink_inode_operations; |
9249 | inode->i_mapping->a_ops = &btrfs_symlink_aops; | 9245 | inode->i_mapping->a_ops = &btrfs_symlink_aops; |
9250 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | ||
9251 | inode_set_bytes(inode, name_len); | 9246 | inode_set_bytes(inode, name_len); |
9252 | btrfs_i_size_write(inode, name_len); | 9247 | btrfs_i_size_write(inode, name_len); |
9253 | err = btrfs_update_inode(trans, root, inode); | 9248 | err = btrfs_update_inode(trans, root, inode); |
@@ -9459,7 +9454,6 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
9459 | inode->i_op = &btrfs_file_inode_operations; | 9454 | inode->i_op = &btrfs_file_inode_operations; |
9460 | 9455 | ||
9461 | inode->i_mapping->a_ops = &btrfs_aops; | 9456 | inode->i_mapping->a_ops = &btrfs_aops; |
9462 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | ||
9463 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 9457 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
9464 | 9458 | ||
9465 | ret = btrfs_init_inode_security(trans, inode, dir, NULL); | 9459 | ret = btrfs_init_inode_security(trans, inode, dir, NULL); |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 7d05e37874d4..fd5599d32362 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -1574,7 +1574,6 @@ out: | |||
1574 | static struct vm_operations_struct ceph_vmops = { | 1574 | static struct vm_operations_struct ceph_vmops = { |
1575 | .fault = ceph_filemap_fault, | 1575 | .fault = ceph_filemap_fault, |
1576 | .page_mkwrite = ceph_page_mkwrite, | 1576 | .page_mkwrite = ceph_page_mkwrite, |
1577 | .remap_pages = generic_file_remap_pages, | ||
1578 | }; | 1577 | }; |
1579 | 1578 | ||
1580 | int ceph_mmap(struct file *file, struct vm_area_struct *vma) | 1579 | int ceph_mmap(struct file *file, struct vm_area_struct *vma) |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 848969ee24db..a3d774b35149 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -952,7 +952,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
952 | mutex_lock(&inode->i_mutex); | 952 | mutex_lock(&inode->i_mutex); |
953 | 953 | ||
954 | /* We can write back this queue in page reclaim */ | 954 | /* We can write back this queue in page reclaim */ |
955 | current->backing_dev_info = file->f_mapping->backing_dev_info; | 955 | current->backing_dev_info = inode_to_bdi(inode); |
956 | 956 | ||
957 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 957 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
958 | if (err) | 958 | if (err) |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index be3af18e4cf1..119c43c80638 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -783,8 +783,6 @@ static int fill_inode(struct inode *inode, struct page *locked_page, | |||
783 | } | 783 | } |
784 | 784 | ||
785 | inode->i_mapping->a_ops = &ceph_aops; | 785 | inode->i_mapping->a_ops = &ceph_aops; |
786 | inode->i_mapping->backing_dev_info = | ||
787 | &ceph_sb_to_client(inode->i_sb)->backing_dev_info; | ||
788 | 786 | ||
789 | switch (inode->i_mode & S_IFMT) { | 787 | switch (inode->i_mode & S_IFMT) { |
790 | case S_IFIFO: | 788 | case S_IFIFO: |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index c35c5c614e38..4347039ecc18 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
@@ -239,23 +239,26 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | |||
239 | return err; | 239 | return err; |
240 | } | 240 | } |
241 | 241 | ||
242 | /** | 242 | /* |
243 | * Must be called with lock_flocks() already held. Fills in the passed | 243 | * Fills in the passed counter variables, so you can prepare pagelist metadata |
244 | * counter variables, so you can prepare pagelist metadata before calling | 244 | * before calling ceph_encode_locks. |
245 | * ceph_encode_locks. | ||
246 | */ | 245 | */ |
247 | void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) | 246 | void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) |
248 | { | 247 | { |
249 | struct file_lock *lock; | 248 | struct file_lock *lock; |
249 | struct file_lock_context *ctx; | ||
250 | 250 | ||
251 | *fcntl_count = 0; | 251 | *fcntl_count = 0; |
252 | *flock_count = 0; | 252 | *flock_count = 0; |
253 | 253 | ||
254 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | 254 | ctx = inode->i_flctx; |
255 | if (lock->fl_flags & FL_POSIX) | 255 | if (ctx) { |
256 | spin_lock(&ctx->flc_lock); | ||
257 | list_for_each_entry(lock, &ctx->flc_posix, fl_list) | ||
256 | ++(*fcntl_count); | 258 | ++(*fcntl_count); |
257 | else if (lock->fl_flags & FL_FLOCK) | 259 | list_for_each_entry(lock, &ctx->flc_flock, fl_list) |
258 | ++(*flock_count); | 260 | ++(*flock_count); |
261 | spin_unlock(&ctx->flc_lock); | ||
259 | } | 262 | } |
260 | dout("counted %d flock locks and %d fcntl locks", | 263 | dout("counted %d flock locks and %d fcntl locks", |
261 | *flock_count, *fcntl_count); | 264 | *flock_count, *fcntl_count); |
@@ -271,6 +274,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode, | |||
271 | int num_fcntl_locks, int num_flock_locks) | 274 | int num_fcntl_locks, int num_flock_locks) |
272 | { | 275 | { |
273 | struct file_lock *lock; | 276 | struct file_lock *lock; |
277 | struct file_lock_context *ctx = inode->i_flctx; | ||
274 | int err = 0; | 278 | int err = 0; |
275 | int seen_fcntl = 0; | 279 | int seen_fcntl = 0; |
276 | int seen_flock = 0; | 280 | int seen_flock = 0; |
@@ -279,33 +283,34 @@ int ceph_encode_locks_to_buffer(struct inode *inode, | |||
279 | dout("encoding %d flock and %d fcntl locks", num_flock_locks, | 283 | dout("encoding %d flock and %d fcntl locks", num_flock_locks, |
280 | num_fcntl_locks); | 284 | num_fcntl_locks); |
281 | 285 | ||
282 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | 286 | if (!ctx) |
283 | if (lock->fl_flags & FL_POSIX) { | 287 | return 0; |
284 | ++seen_fcntl; | 288 | |
285 | if (seen_fcntl > num_fcntl_locks) { | 289 | spin_lock(&ctx->flc_lock); |
286 | err = -ENOSPC; | 290 | list_for_each_entry(lock, &ctx->flc_flock, fl_list) { |
287 | goto fail; | 291 | ++seen_fcntl; |
288 | } | 292 | if (seen_fcntl > num_fcntl_locks) { |
289 | err = lock_to_ceph_filelock(lock, &flocks[l]); | 293 | err = -ENOSPC; |
290 | if (err) | 294 | goto fail; |
291 | goto fail; | ||
292 | ++l; | ||
293 | } | 295 | } |
296 | err = lock_to_ceph_filelock(lock, &flocks[l]); | ||
297 | if (err) | ||
298 | goto fail; | ||
299 | ++l; | ||
294 | } | 300 | } |
295 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | 301 | list_for_each_entry(lock, &ctx->flc_flock, fl_list) { |
296 | if (lock->fl_flags & FL_FLOCK) { | 302 | ++seen_flock; |
297 | ++seen_flock; | 303 | if (seen_flock > num_flock_locks) { |
298 | if (seen_flock > num_flock_locks) { | 304 | err = -ENOSPC; |
299 | err = -ENOSPC; | 305 | goto fail; |
300 | goto fail; | ||
301 | } | ||
302 | err = lock_to_ceph_filelock(lock, &flocks[l]); | ||
303 | if (err) | ||
304 | goto fail; | ||
305 | ++l; | ||
306 | } | 306 | } |
307 | err = lock_to_ceph_filelock(lock, &flocks[l]); | ||
308 | if (err) | ||
309 | goto fail; | ||
310 | ++l; | ||
307 | } | 311 | } |
308 | fail: | 312 | fail: |
313 | spin_unlock(&ctx->flc_lock); | ||
309 | return err; | 314 | return err; |
310 | } | 315 | } |
311 | 316 | ||
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 4c1e36a171af..71c073f38e54 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -2764,20 +2764,16 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
2764 | struct ceph_filelock *flocks; | 2764 | struct ceph_filelock *flocks; |
2765 | 2765 | ||
2766 | encode_again: | 2766 | encode_again: |
2767 | spin_lock(&inode->i_lock); | ||
2768 | ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); | 2767 | ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); |
2769 | spin_unlock(&inode->i_lock); | ||
2770 | flocks = kmalloc((num_fcntl_locks+num_flock_locks) * | 2768 | flocks = kmalloc((num_fcntl_locks+num_flock_locks) * |
2771 | sizeof(struct ceph_filelock), GFP_NOFS); | 2769 | sizeof(struct ceph_filelock), GFP_NOFS); |
2772 | if (!flocks) { | 2770 | if (!flocks) { |
2773 | err = -ENOMEM; | 2771 | err = -ENOMEM; |
2774 | goto out_free; | 2772 | goto out_free; |
2775 | } | 2773 | } |
2776 | spin_lock(&inode->i_lock); | ||
2777 | err = ceph_encode_locks_to_buffer(inode, flocks, | 2774 | err = ceph_encode_locks_to_buffer(inode, flocks, |
2778 | num_fcntl_locks, | 2775 | num_fcntl_locks, |
2779 | num_flock_locks); | 2776 | num_flock_locks); |
2780 | spin_unlock(&inode->i_lock); | ||
2781 | if (err) { | 2777 | if (err) { |
2782 | kfree(flocks); | 2778 | kfree(flocks); |
2783 | if (err == -ENOSPC) | 2779 | if (err == -ENOSPC) |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 8f8983f38b82..a63997b8bcff 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -40,17 +40,6 @@ static void ceph_put_super(struct super_block *s) | |||
40 | 40 | ||
41 | dout("put_super\n"); | 41 | dout("put_super\n"); |
42 | ceph_mdsc_close_sessions(fsc->mdsc); | 42 | ceph_mdsc_close_sessions(fsc->mdsc); |
43 | |||
44 | /* | ||
45 | * ensure we release the bdi before put_anon_super releases | ||
46 | * the device name. | ||
47 | */ | ||
48 | if (s->s_bdi == &fsc->backing_dev_info) { | ||
49 | bdi_unregister(&fsc->backing_dev_info); | ||
50 | s->s_bdi = NULL; | ||
51 | } | ||
52 | |||
53 | return; | ||
54 | } | 43 | } |
55 | 44 | ||
56 | static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | 45 | static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) |
@@ -914,7 +903,7 @@ static int ceph_register_bdi(struct super_block *sb, | |||
914 | >> PAGE_SHIFT; | 903 | >> PAGE_SHIFT; |
915 | else | 904 | else |
916 | fsc->backing_dev_info.ra_pages = | 905 | fsc->backing_dev_info.ra_pages = |
917 | default_backing_dev_info.ra_pages; | 906 | VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE; |
918 | 907 | ||
919 | err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld", | 908 | err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld", |
920 | atomic_long_inc_return(&bdi_seq)); | 909 | atomic_long_inc_return(&bdi_seq)); |
@@ -1006,11 +995,16 @@ out_final: | |||
1006 | static void ceph_kill_sb(struct super_block *s) | 995 | static void ceph_kill_sb(struct super_block *s) |
1007 | { | 996 | { |
1008 | struct ceph_fs_client *fsc = ceph_sb_to_client(s); | 997 | struct ceph_fs_client *fsc = ceph_sb_to_client(s); |
998 | dev_t dev = s->s_dev; | ||
999 | |||
1009 | dout("kill_sb %p\n", s); | 1000 | dout("kill_sb %p\n", s); |
1001 | |||
1010 | ceph_mdsc_pre_umount(fsc->mdsc); | 1002 | ceph_mdsc_pre_umount(fsc->mdsc); |
1011 | kill_anon_super(s); /* will call put_super after sb is r/o */ | 1003 | generic_shutdown_super(s); |
1012 | ceph_mdsc_destroy(fsc); | 1004 | ceph_mdsc_destroy(fsc); |
1005 | |||
1013 | destroy_fs_client(fsc); | 1006 | destroy_fs_client(fsc); |
1007 | free_anon_bdev(dev); | ||
1014 | } | 1008 | } |
1015 | 1009 | ||
1016 | static struct file_system_type ceph_fs_type = { | 1010 | static struct file_system_type ceph_fs_type = { |
diff --git a/fs/char_dev.c b/fs/char_dev.c index 67b2007f10fe..ea06a3d0364c 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c | |||
@@ -24,27 +24,6 @@ | |||
24 | 24 | ||
25 | #include "internal.h" | 25 | #include "internal.h" |
26 | 26 | ||
27 | /* | ||
28 | * capabilities for /dev/mem, /dev/kmem and similar directly mappable character | ||
29 | * devices | ||
30 | * - permits shared-mmap for read, write and/or exec | ||
31 | * - does not permit private mmap in NOMMU mode (can't do COW) | ||
32 | * - no readahead or I/O queue unplugging required | ||
33 | */ | ||
34 | struct backing_dev_info directly_mappable_cdev_bdi = { | ||
35 | .name = "char", | ||
36 | .capabilities = ( | ||
37 | #ifdef CONFIG_MMU | ||
38 | /* permit private copies of the data to be taken */ | ||
39 | BDI_CAP_MAP_COPY | | ||
40 | #endif | ||
41 | /* permit direct mmap, for read, write or exec */ | ||
42 | BDI_CAP_MAP_DIRECT | | ||
43 | BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP | | ||
44 | /* no writeback happens */ | ||
45 | BDI_CAP_NO_ACCT_AND_WRITEBACK), | ||
46 | }; | ||
47 | |||
48 | static struct kobj_map *cdev_map; | 27 | static struct kobj_map *cdev_map; |
49 | 28 | ||
50 | static DEFINE_MUTEX(chrdevs_lock); | 29 | static DEFINE_MUTEX(chrdevs_lock); |
@@ -575,8 +554,6 @@ static struct kobject *base_probe(dev_t dev, int *part, void *data) | |||
575 | void __init chrdev_init(void) | 554 | void __init chrdev_init(void) |
576 | { | 555 | { |
577 | cdev_map = kobj_map_init(base_probe, &chrdevs_lock); | 556 | cdev_map = kobj_map_init(base_probe, &chrdevs_lock); |
578 | if (bdi_init(&directly_mappable_cdev_bdi)) | ||
579 | panic("Failed to init directly mappable cdev bdi"); | ||
580 | } | 557 | } |
581 | 558 | ||
582 | 559 | ||
@@ -590,4 +567,3 @@ EXPORT_SYMBOL(cdev_del); | |||
590 | EXPORT_SYMBOL(cdev_add); | 567 | EXPORT_SYMBOL(cdev_add); |
591 | EXPORT_SYMBOL(__register_chrdev); | 568 | EXPORT_SYMBOL(__register_chrdev); |
592 | EXPORT_SYMBOL(__unregister_chrdev); | 569 | EXPORT_SYMBOL(__unregister_chrdev); |
593 | EXPORT_SYMBOL(directly_mappable_cdev_bdi); | ||
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 2a772da16b83..d3aa999ab785 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -3446,7 +3446,7 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) | |||
3446 | int referral_walks_count = 0; | 3446 | int referral_walks_count = 0; |
3447 | #endif | 3447 | #endif |
3448 | 3448 | ||
3449 | rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY); | 3449 | rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs"); |
3450 | if (rc) | 3450 | if (rc) |
3451 | return rc; | 3451 | return rc; |
3452 | 3452 | ||
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 74f12877493a..a94b3e673182 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -1113,11 +1113,6 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) | |||
1113 | return rc; | 1113 | return rc; |
1114 | } | 1114 | } |
1115 | 1115 | ||
1116 | /* copied from fs/locks.c with a name change */ | ||
1117 | #define cifs_for_each_lock(inode, lockp) \ | ||
1118 | for (lockp = &inode->i_flock; *lockp != NULL; \ | ||
1119 | lockp = &(*lockp)->fl_next) | ||
1120 | |||
1121 | struct lock_to_push { | 1116 | struct lock_to_push { |
1122 | struct list_head llist; | 1117 | struct list_head llist; |
1123 | __u64 offset; | 1118 | __u64 offset; |
@@ -1132,8 +1127,9 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
1132 | { | 1127 | { |
1133 | struct inode *inode = cfile->dentry->d_inode; | 1128 | struct inode *inode = cfile->dentry->d_inode; |
1134 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | 1129 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); |
1135 | struct file_lock *flock, **before; | 1130 | struct file_lock *flock; |
1136 | unsigned int count = 0, i = 0; | 1131 | struct file_lock_context *flctx = inode->i_flctx; |
1132 | unsigned int count = 0, i; | ||
1137 | int rc = 0, xid, type; | 1133 | int rc = 0, xid, type; |
1138 | struct list_head locks_to_send, *el; | 1134 | struct list_head locks_to_send, *el; |
1139 | struct lock_to_push *lck, *tmp; | 1135 | struct lock_to_push *lck, *tmp; |
@@ -1141,12 +1137,14 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
1141 | 1137 | ||
1142 | xid = get_xid(); | 1138 | xid = get_xid(); |
1143 | 1139 | ||
1144 | spin_lock(&inode->i_lock); | 1140 | if (!flctx) |
1145 | cifs_for_each_lock(inode, before) { | 1141 | goto out; |
1146 | if ((*before)->fl_flags & FL_POSIX) | 1142 | |
1147 | count++; | 1143 | spin_lock(&flctx->flc_lock); |
1144 | list_for_each(el, &flctx->flc_posix) { | ||
1145 | count++; | ||
1148 | } | 1146 | } |
1149 | spin_unlock(&inode->i_lock); | 1147 | spin_unlock(&flctx->flc_lock); |
1150 | 1148 | ||
1151 | INIT_LIST_HEAD(&locks_to_send); | 1149 | INIT_LIST_HEAD(&locks_to_send); |
1152 | 1150 | ||
@@ -1155,7 +1153,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
1155 | * added to the list while we are holding cinode->lock_sem that | 1153 | * added to the list while we are holding cinode->lock_sem that |
1156 | * protects locking operations of this inode. | 1154 | * protects locking operations of this inode. |
1157 | */ | 1155 | */ |
1158 | for (; i < count; i++) { | 1156 | for (i = 0; i < count; i++) { |
1159 | lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); | 1157 | lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); |
1160 | if (!lck) { | 1158 | if (!lck) { |
1161 | rc = -ENOMEM; | 1159 | rc = -ENOMEM; |
@@ -1165,11 +1163,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
1165 | } | 1163 | } |
1166 | 1164 | ||
1167 | el = locks_to_send.next; | 1165 | el = locks_to_send.next; |
1168 | spin_lock(&inode->i_lock); | 1166 | spin_lock(&flctx->flc_lock); |
1169 | cifs_for_each_lock(inode, before) { | 1167 | list_for_each_entry(flock, &flctx->flc_posix, fl_list) { |
1170 | flock = *before; | ||
1171 | if ((flock->fl_flags & FL_POSIX) == 0) | ||
1172 | continue; | ||
1173 | if (el == &locks_to_send) { | 1168 | if (el == &locks_to_send) { |
1174 | /* | 1169 | /* |
1175 | * The list ended. We don't have enough allocated | 1170 | * The list ended. We don't have enough allocated |
@@ -1189,9 +1184,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
1189 | lck->length = length; | 1184 | lck->length = length; |
1190 | lck->type = type; | 1185 | lck->type = type; |
1191 | lck->offset = flock->fl_start; | 1186 | lck->offset = flock->fl_start; |
1192 | el = el->next; | ||
1193 | } | 1187 | } |
1194 | spin_unlock(&inode->i_lock); | 1188 | spin_unlock(&flctx->flc_lock); |
1195 | 1189 | ||
1196 | list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { | 1190 | list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { |
1197 | int stored_rc; | 1191 | int stored_rc; |
@@ -3248,7 +3242,6 @@ static struct vm_operations_struct cifs_file_vm_ops = { | |||
3248 | .fault = filemap_fault, | 3242 | .fault = filemap_fault, |
3249 | .map_pages = filemap_map_pages, | 3243 | .map_pages = filemap_map_pages, |
3250 | .page_mkwrite = cifs_page_mkwrite, | 3244 | .page_mkwrite = cifs_page_mkwrite, |
3251 | .remap_pages = generic_file_remap_pages, | ||
3252 | }; | 3245 | }; |
3253 | 3246 | ||
3254 | int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) | 3247 | int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 0c3ce464cae4..2d4f37235ed0 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -937,8 +937,6 @@ retry_iget5_locked: | |||
937 | inode->i_flags |= S_NOATIME | S_NOCMTIME; | 937 | inode->i_flags |= S_NOATIME | S_NOCMTIME; |
938 | if (inode->i_state & I_NEW) { | 938 | if (inode->i_state & I_NEW) { |
939 | inode->i_ino = hash; | 939 | inode->i_ino = hash; |
940 | if (S_ISREG(inode->i_mode)) | ||
941 | inode->i_data.backing_dev_info = sb->s_bdi; | ||
942 | #ifdef CONFIG_CIFS_FSCACHE | 940 | #ifdef CONFIG_CIFS_FSCACHE |
943 | /* initialize per-inode cache cookie pointer */ | 941 | /* initialize per-inode cache cookie pointer */ |
944 | CIFS_I(inode)->fscache = NULL; | 942 | CIFS_I(inode)->fscache = NULL; |
diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 86c893884eb9..281ee011bb6a 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c | |||
@@ -28,29 +28,6 @@ | |||
28 | 28 | ||
29 | #include "coda_int.h" | 29 | #include "coda_int.h" |
30 | 30 | ||
31 | /* dir inode-ops */ | ||
32 | static int coda_create(struct inode *dir, struct dentry *new, umode_t mode, bool excl); | ||
33 | static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, unsigned int flags); | ||
34 | static int coda_link(struct dentry *old_dentry, struct inode *dir_inode, | ||
35 | struct dentry *entry); | ||
36 | static int coda_unlink(struct inode *dir_inode, struct dentry *entry); | ||
37 | static int coda_symlink(struct inode *dir_inode, struct dentry *entry, | ||
38 | const char *symname); | ||
39 | static int coda_mkdir(struct inode *dir_inode, struct dentry *entry, umode_t mode); | ||
40 | static int coda_rmdir(struct inode *dir_inode, struct dentry *entry); | ||
41 | static int coda_rename(struct inode *old_inode, struct dentry *old_dentry, | ||
42 | struct inode *new_inode, struct dentry *new_dentry); | ||
43 | |||
44 | /* dir file-ops */ | ||
45 | static int coda_readdir(struct file *file, struct dir_context *ctx); | ||
46 | |||
47 | /* dentry ops */ | ||
48 | static int coda_dentry_revalidate(struct dentry *de, unsigned int flags); | ||
49 | static int coda_dentry_delete(const struct dentry *); | ||
50 | |||
51 | /* support routines */ | ||
52 | static int coda_venus_readdir(struct file *, struct dir_context *); | ||
53 | |||
54 | /* same as fs/bad_inode.c */ | 31 | /* same as fs/bad_inode.c */ |
55 | static int coda_return_EIO(void) | 32 | static int coda_return_EIO(void) |
56 | { | 33 | { |
@@ -58,38 +35,6 @@ static int coda_return_EIO(void) | |||
58 | } | 35 | } |
59 | #define CODA_EIO_ERROR ((void *) (coda_return_EIO)) | 36 | #define CODA_EIO_ERROR ((void *) (coda_return_EIO)) |
60 | 37 | ||
61 | const struct dentry_operations coda_dentry_operations = | ||
62 | { | ||
63 | .d_revalidate = coda_dentry_revalidate, | ||
64 | .d_delete = coda_dentry_delete, | ||
65 | }; | ||
66 | |||
67 | const struct inode_operations coda_dir_inode_operations = | ||
68 | { | ||
69 | .create = coda_create, | ||
70 | .lookup = coda_lookup, | ||
71 | .link = coda_link, | ||
72 | .unlink = coda_unlink, | ||
73 | .symlink = coda_symlink, | ||
74 | .mkdir = coda_mkdir, | ||
75 | .rmdir = coda_rmdir, | ||
76 | .mknod = CODA_EIO_ERROR, | ||
77 | .rename = coda_rename, | ||
78 | .permission = coda_permission, | ||
79 | .getattr = coda_getattr, | ||
80 | .setattr = coda_setattr, | ||
81 | }; | ||
82 | |||
83 | const struct file_operations coda_dir_operations = { | ||
84 | .llseek = generic_file_llseek, | ||
85 | .read = generic_read_dir, | ||
86 | .iterate = coda_readdir, | ||
87 | .open = coda_open, | ||
88 | .release = coda_release, | ||
89 | .fsync = coda_fsync, | ||
90 | }; | ||
91 | |||
92 | |||
93 | /* inode operations for directories */ | 38 | /* inode operations for directories */ |
94 | /* access routines: lookup, readlink, permission */ | 39 | /* access routines: lookup, readlink, permission */ |
95 | static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, unsigned int flags) | 40 | static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, unsigned int flags) |
@@ -374,33 +319,6 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
374 | return error; | 319 | return error; |
375 | } | 320 | } |
376 | 321 | ||
377 | |||
378 | /* file operations for directories */ | ||
379 | static int coda_readdir(struct file *coda_file, struct dir_context *ctx) | ||
380 | { | ||
381 | struct coda_file_info *cfi; | ||
382 | struct file *host_file; | ||
383 | int ret; | ||
384 | |||
385 | cfi = CODA_FTOC(coda_file); | ||
386 | BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); | ||
387 | host_file = cfi->cfi_container; | ||
388 | |||
389 | if (host_file->f_op->iterate) { | ||
390 | struct inode *host_inode = file_inode(host_file); | ||
391 | mutex_lock(&host_inode->i_mutex); | ||
392 | ret = -ENOENT; | ||
393 | if (!IS_DEADDIR(host_inode)) { | ||
394 | ret = host_file->f_op->iterate(host_file, ctx); | ||
395 | file_accessed(host_file); | ||
396 | } | ||
397 | mutex_unlock(&host_inode->i_mutex); | ||
398 | return ret; | ||
399 | } | ||
400 | /* Venus: we must read Venus dirents from a file */ | ||
401 | return coda_venus_readdir(coda_file, ctx); | ||
402 | } | ||
403 | |||
404 | static inline unsigned int CDT2DT(unsigned char cdt) | 322 | static inline unsigned int CDT2DT(unsigned char cdt) |
405 | { | 323 | { |
406 | unsigned int dt; | 324 | unsigned int dt; |
@@ -495,6 +413,33 @@ out: | |||
495 | return 0; | 413 | return 0; |
496 | } | 414 | } |
497 | 415 | ||
416 | /* file operations for directories */ | ||
417 | static int coda_readdir(struct file *coda_file, struct dir_context *ctx) | ||
418 | { | ||
419 | struct coda_file_info *cfi; | ||
420 | struct file *host_file; | ||
421 | int ret; | ||
422 | |||
423 | cfi = CODA_FTOC(coda_file); | ||
424 | BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); | ||
425 | host_file = cfi->cfi_container; | ||
426 | |||
427 | if (host_file->f_op->iterate) { | ||
428 | struct inode *host_inode = file_inode(host_file); | ||
429 | |||
430 | mutex_lock(&host_inode->i_mutex); | ||
431 | ret = -ENOENT; | ||
432 | if (!IS_DEADDIR(host_inode)) { | ||
433 | ret = host_file->f_op->iterate(host_file, ctx); | ||
434 | file_accessed(host_file); | ||
435 | } | ||
436 | mutex_unlock(&host_inode->i_mutex); | ||
437 | return ret; | ||
438 | } | ||
439 | /* Venus: we must read Venus dirents from a file */ | ||
440 | return coda_venus_readdir(coda_file, ctx); | ||
441 | } | ||
442 | |||
498 | /* called when a cache lookup succeeds */ | 443 | /* called when a cache lookup succeeds */ |
499 | static int coda_dentry_revalidate(struct dentry *de, unsigned int flags) | 444 | static int coda_dentry_revalidate(struct dentry *de, unsigned int flags) |
500 | { | 445 | { |
@@ -603,3 +548,32 @@ int coda_revalidate_inode(struct inode *inode) | |||
603 | } | 548 | } |
604 | return 0; | 549 | return 0; |
605 | } | 550 | } |
551 | |||
552 | const struct dentry_operations coda_dentry_operations = { | ||
553 | .d_revalidate = coda_dentry_revalidate, | ||
554 | .d_delete = coda_dentry_delete, | ||
555 | }; | ||
556 | |||
557 | const struct inode_operations coda_dir_inode_operations = { | ||
558 | .create = coda_create, | ||
559 | .lookup = coda_lookup, | ||
560 | .link = coda_link, | ||
561 | .unlink = coda_unlink, | ||
562 | .symlink = coda_symlink, | ||
563 | .mkdir = coda_mkdir, | ||
564 | .rmdir = coda_rmdir, | ||
565 | .mknod = CODA_EIO_ERROR, | ||
566 | .rename = coda_rename, | ||
567 | .permission = coda_permission, | ||
568 | .getattr = coda_getattr, | ||
569 | .setattr = coda_setattr, | ||
570 | }; | ||
571 | |||
572 | const struct file_operations coda_dir_operations = { | ||
573 | .llseek = generic_file_llseek, | ||
574 | .read = generic_read_dir, | ||
575 | .iterate = coda_readdir, | ||
576 | .open = coda_open, | ||
577 | .release = coda_release, | ||
578 | .fsync = coda_fsync, | ||
579 | }; | ||
diff --git a/fs/coda/inode.c b/fs/coda/inode.c index b945410bfcd5..82ec68b59208 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c | |||
@@ -183,7 +183,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) | |||
183 | goto unlock_out; | 183 | goto unlock_out; |
184 | } | 184 | } |
185 | 185 | ||
186 | error = bdi_setup_and_register(&vc->bdi, "coda", BDI_CAP_MAP_COPY); | 186 | error = bdi_setup_and_register(&vc->bdi, "coda"); |
187 | if (error) | 187 | if (error) |
188 | goto unlock_out; | 188 | goto unlock_out; |
189 | 189 | ||
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index bd4a3c167091..a315677e44d3 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h | |||
@@ -70,8 +70,6 @@ extern int configfs_is_root(struct config_item *item); | |||
70 | 70 | ||
71 | extern struct inode * configfs_new_inode(umode_t mode, struct configfs_dirent *, struct super_block *); | 71 | extern struct inode * configfs_new_inode(umode_t mode, struct configfs_dirent *, struct super_block *); |
72 | extern int configfs_create(struct dentry *, umode_t mode, int (*init)(struct inode *)); | 72 | extern int configfs_create(struct dentry *, umode_t mode, int (*init)(struct inode *)); |
73 | extern int configfs_inode_init(void); | ||
74 | extern void configfs_inode_exit(void); | ||
75 | 73 | ||
76 | extern int configfs_create_file(struct config_item *, const struct configfs_attribute *); | 74 | extern int configfs_create_file(struct config_item *, const struct configfs_attribute *); |
77 | extern int configfs_make_dirent(struct configfs_dirent *, | 75 | extern int configfs_make_dirent(struct configfs_dirent *, |
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 5946ad98053f..65af86147154 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c | |||
@@ -50,12 +50,6 @@ static const struct address_space_operations configfs_aops = { | |||
50 | .write_end = simple_write_end, | 50 | .write_end = simple_write_end, |
51 | }; | 51 | }; |
52 | 52 | ||
53 | static struct backing_dev_info configfs_backing_dev_info = { | ||
54 | .name = "configfs", | ||
55 | .ra_pages = 0, /* No readahead */ | ||
56 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | ||
57 | }; | ||
58 | |||
59 | static const struct inode_operations configfs_inode_operations ={ | 53 | static const struct inode_operations configfs_inode_operations ={ |
60 | .setattr = configfs_setattr, | 54 | .setattr = configfs_setattr, |
61 | }; | 55 | }; |
@@ -137,7 +131,6 @@ struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent *sd, | |||
137 | if (inode) { | 131 | if (inode) { |
138 | inode->i_ino = get_next_ino(); | 132 | inode->i_ino = get_next_ino(); |
139 | inode->i_mapping->a_ops = &configfs_aops; | 133 | inode->i_mapping->a_ops = &configfs_aops; |
140 | inode->i_mapping->backing_dev_info = &configfs_backing_dev_info; | ||
141 | inode->i_op = &configfs_inode_operations; | 134 | inode->i_op = &configfs_inode_operations; |
142 | 135 | ||
143 | if (sd->s_iattr) { | 136 | if (sd->s_iattr) { |
@@ -283,13 +276,3 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name) | |||
283 | } | 276 | } |
284 | mutex_unlock(&dir->d_inode->i_mutex); | 277 | mutex_unlock(&dir->d_inode->i_mutex); |
285 | } | 278 | } |
286 | |||
287 | int __init configfs_inode_init(void) | ||
288 | { | ||
289 | return bdi_init(&configfs_backing_dev_info); | ||
290 | } | ||
291 | |||
292 | void configfs_inode_exit(void) | ||
293 | { | ||
294 | bdi_destroy(&configfs_backing_dev_info); | ||
295 | } | ||
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index f6c285833390..da94e41bdbf6 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c | |||
@@ -145,19 +145,13 @@ static int __init configfs_init(void) | |||
145 | if (!config_kobj) | 145 | if (!config_kobj) |
146 | goto out2; | 146 | goto out2; |
147 | 147 | ||
148 | err = configfs_inode_init(); | ||
149 | if (err) | ||
150 | goto out3; | ||
151 | |||
152 | err = register_filesystem(&configfs_fs_type); | 148 | err = register_filesystem(&configfs_fs_type); |
153 | if (err) | 149 | if (err) |
154 | goto out4; | 150 | goto out3; |
155 | 151 | ||
156 | return 0; | 152 | return 0; |
157 | out4: | ||
158 | pr_err("Unable to register filesystem!\n"); | ||
159 | configfs_inode_exit(); | ||
160 | out3: | 153 | out3: |
154 | pr_err("Unable to register filesystem!\n"); | ||
161 | kobject_put(config_kobj); | 155 | kobject_put(config_kobj); |
162 | out2: | 156 | out2: |
163 | kmem_cache_destroy(configfs_dir_cachep); | 157 | kmem_cache_destroy(configfs_dir_cachep); |
@@ -172,7 +166,6 @@ static void __exit configfs_exit(void) | |||
172 | kobject_put(config_kobj); | 166 | kobject_put(config_kobj); |
173 | kmem_cache_destroy(configfs_dir_cachep); | 167 | kmem_cache_destroy(configfs_dir_cachep); |
174 | configfs_dir_cachep = NULL; | 168 | configfs_dir_cachep = NULL; |
175 | configfs_inode_exit(); | ||
176 | } | 169 | } |
177 | 170 | ||
178 | MODULE_AUTHOR("Oracle"); | 171 | MODULE_AUTHOR("Oracle"); |
diff --git a/fs/dax.c b/fs/dax.c new file mode 100644 index 000000000000..ed1619ec6537 --- /dev/null +++ b/fs/dax.c | |||
@@ -0,0 +1,534 @@ | |||
1 | /* | ||
2 | * fs/dax.c - Direct Access filesystem code | ||
3 | * Copyright (c) 2013-2014 Intel Corporation | ||
4 | * Author: Matthew Wilcox <matthew.r.wilcox@intel.com> | ||
5 | * Author: Ross Zwisler <ross.zwisler@linux.intel.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify it | ||
8 | * under the terms and conditions of the GNU General Public License, | ||
9 | * version 2, as published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
14 | * more details. | ||
15 | */ | ||
16 | |||
17 | #include <linux/atomic.h> | ||
18 | #include <linux/blkdev.h> | ||
19 | #include <linux/buffer_head.h> | ||
20 | #include <linux/fs.h> | ||
21 | #include <linux/genhd.h> | ||
22 | #include <linux/highmem.h> | ||
23 | #include <linux/memcontrol.h> | ||
24 | #include <linux/mm.h> | ||
25 | #include <linux/mutex.h> | ||
26 | #include <linux/sched.h> | ||
27 | #include <linux/uio.h> | ||
28 | #include <linux/vmstat.h> | ||
29 | |||
30 | int dax_clear_blocks(struct inode *inode, sector_t block, long size) | ||
31 | { | ||
32 | struct block_device *bdev = inode->i_sb->s_bdev; | ||
33 | sector_t sector = block << (inode->i_blkbits - 9); | ||
34 | |||
35 | might_sleep(); | ||
36 | do { | ||
37 | void *addr; | ||
38 | unsigned long pfn; | ||
39 | long count; | ||
40 | |||
41 | count = bdev_direct_access(bdev, sector, &addr, &pfn, size); | ||
42 | if (count < 0) | ||
43 | return count; | ||
44 | BUG_ON(size < count); | ||
45 | while (count > 0) { | ||
46 | unsigned pgsz = PAGE_SIZE - offset_in_page(addr); | ||
47 | if (pgsz > count) | ||
48 | pgsz = count; | ||
49 | if (pgsz < PAGE_SIZE) | ||
50 | memset(addr, 0, pgsz); | ||
51 | else | ||
52 | clear_page(addr); | ||
53 | addr += pgsz; | ||
54 | size -= pgsz; | ||
55 | count -= pgsz; | ||
56 | BUG_ON(pgsz & 511); | ||
57 | sector += pgsz / 512; | ||
58 | cond_resched(); | ||
59 | } | ||
60 | } while (size); | ||
61 | |||
62 | return 0; | ||
63 | } | ||
64 | EXPORT_SYMBOL_GPL(dax_clear_blocks); | ||
65 | |||
66 | static long dax_get_addr(struct buffer_head *bh, void **addr, unsigned blkbits) | ||
67 | { | ||
68 | unsigned long pfn; | ||
69 | sector_t sector = bh->b_blocknr << (blkbits - 9); | ||
70 | return bdev_direct_access(bh->b_bdev, sector, addr, &pfn, bh->b_size); | ||
71 | } | ||
72 | |||
73 | static void dax_new_buf(void *addr, unsigned size, unsigned first, loff_t pos, | ||
74 | loff_t end) | ||
75 | { | ||
76 | loff_t final = end - pos + first; /* The final byte of the buffer */ | ||
77 | |||
78 | if (first > 0) | ||
79 | memset(addr, 0, first); | ||
80 | if (final < size) | ||
81 | memset(addr + final, 0, size - final); | ||
82 | } | ||
83 | |||
84 | static bool buffer_written(struct buffer_head *bh) | ||
85 | { | ||
86 | return buffer_mapped(bh) && !buffer_unwritten(bh); | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * When ext4 encounters a hole, it returns without modifying the buffer_head | ||
91 | * which means that we can't trust b_size. To cope with this, we set b_state | ||
92 | * to 0 before calling get_block and, if any bit is set, we know we can trust | ||
93 | * b_size. Unfortunate, really, since ext4 knows precisely how long a hole is | ||
94 | * and would save us time calling get_block repeatedly. | ||
95 | */ | ||
96 | static bool buffer_size_valid(struct buffer_head *bh) | ||
97 | { | ||
98 | return bh->b_state != 0; | ||
99 | } | ||
100 | |||
101 | static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter, | ||
102 | loff_t start, loff_t end, get_block_t get_block, | ||
103 | struct buffer_head *bh) | ||
104 | { | ||
105 | ssize_t retval = 0; | ||
106 | loff_t pos = start; | ||
107 | loff_t max = start; | ||
108 | loff_t bh_max = start; | ||
109 | void *addr; | ||
110 | bool hole = false; | ||
111 | |||
112 | if (rw != WRITE) | ||
113 | end = min(end, i_size_read(inode)); | ||
114 | |||
115 | while (pos < end) { | ||
116 | unsigned len; | ||
117 | if (pos == max) { | ||
118 | unsigned blkbits = inode->i_blkbits; | ||
119 | sector_t block = pos >> blkbits; | ||
120 | unsigned first = pos - (block << blkbits); | ||
121 | long size; | ||
122 | |||
123 | if (pos == bh_max) { | ||
124 | bh->b_size = PAGE_ALIGN(end - pos); | ||
125 | bh->b_state = 0; | ||
126 | retval = get_block(inode, block, bh, | ||
127 | rw == WRITE); | ||
128 | if (retval) | ||
129 | break; | ||
130 | if (!buffer_size_valid(bh)) | ||
131 | bh->b_size = 1 << blkbits; | ||
132 | bh_max = pos - first + bh->b_size; | ||
133 | } else { | ||
134 | unsigned done = bh->b_size - | ||
135 | (bh_max - (pos - first)); | ||
136 | bh->b_blocknr += done >> blkbits; | ||
137 | bh->b_size -= done; | ||
138 | } | ||
139 | |||
140 | hole = (rw != WRITE) && !buffer_written(bh); | ||
141 | if (hole) { | ||
142 | addr = NULL; | ||
143 | size = bh->b_size - first; | ||
144 | } else { | ||
145 | retval = dax_get_addr(bh, &addr, blkbits); | ||
146 | if (retval < 0) | ||
147 | break; | ||
148 | if (buffer_unwritten(bh) || buffer_new(bh)) | ||
149 | dax_new_buf(addr, retval, first, pos, | ||
150 | end); | ||
151 | addr += first; | ||
152 | size = retval - first; | ||
153 | } | ||
154 | max = min(pos + size, end); | ||
155 | } | ||
156 | |||
157 | if (rw == WRITE) | ||
158 | len = copy_from_iter(addr, max - pos, iter); | ||
159 | else if (!hole) | ||
160 | len = copy_to_iter(addr, max - pos, iter); | ||
161 | else | ||
162 | len = iov_iter_zero(max - pos, iter); | ||
163 | |||
164 | if (!len) | ||
165 | break; | ||
166 | |||
167 | pos += len; | ||
168 | addr += len; | ||
169 | } | ||
170 | |||
171 | return (pos == start) ? retval : pos - start; | ||
172 | } | ||
173 | |||
174 | /** | ||
175 | * dax_do_io - Perform I/O to a DAX file | ||
176 | * @rw: READ to read or WRITE to write | ||
177 | * @iocb: The control block for this I/O | ||
178 | * @inode: The file which the I/O is directed at | ||
179 | * @iter: The addresses to do I/O from or to | ||
180 | * @pos: The file offset where the I/O starts | ||
181 | * @get_block: The filesystem method used to translate file offsets to blocks | ||
182 | * @end_io: A filesystem callback for I/O completion | ||
183 | * @flags: See below | ||
184 | * | ||
185 | * This function uses the same locking scheme as do_blockdev_direct_IO: | ||
186 | * If @flags has DIO_LOCKING set, we assume that the i_mutex is held by the | ||
187 | * caller for writes. For reads, we take and release the i_mutex ourselves. | ||
188 | * If DIO_LOCKING is not set, the filesystem takes care of its own locking. | ||
189 | * As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O | ||
190 | * is in progress. | ||
191 | */ | ||
192 | ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode, | ||
193 | struct iov_iter *iter, loff_t pos, | ||
194 | get_block_t get_block, dio_iodone_t end_io, int flags) | ||
195 | { | ||
196 | struct buffer_head bh; | ||
197 | ssize_t retval = -EINVAL; | ||
198 | loff_t end = pos + iov_iter_count(iter); | ||
199 | |||
200 | memset(&bh, 0, sizeof(bh)); | ||
201 | |||
202 | if ((flags & DIO_LOCKING) && (rw == READ)) { | ||
203 | struct address_space *mapping = inode->i_mapping; | ||
204 | mutex_lock(&inode->i_mutex); | ||
205 | retval = filemap_write_and_wait_range(mapping, pos, end - 1); | ||
206 | if (retval) { | ||
207 | mutex_unlock(&inode->i_mutex); | ||
208 | goto out; | ||
209 | } | ||
210 | } | ||
211 | |||
212 | /* Protects against truncate */ | ||
213 | atomic_inc(&inode->i_dio_count); | ||
214 | |||
215 | retval = dax_io(rw, inode, iter, pos, end, get_block, &bh); | ||
216 | |||
217 | if ((flags & DIO_LOCKING) && (rw == READ)) | ||
218 | mutex_unlock(&inode->i_mutex); | ||
219 | |||
220 | if ((retval > 0) && end_io) | ||
221 | end_io(iocb, pos, retval, bh.b_private); | ||
222 | |||
223 | inode_dio_done(inode); | ||
224 | out: | ||
225 | return retval; | ||
226 | } | ||
227 | EXPORT_SYMBOL_GPL(dax_do_io); | ||
228 | |||
229 | /* | ||
230 | * The user has performed a load from a hole in the file. Allocating | ||
231 | * a new page in the file would cause excessive storage usage for | ||
232 | * workloads with sparse files. We allocate a page cache page instead. | ||
233 | * We'll kick it out of the page cache if it's ever written to, | ||
234 | * otherwise it will simply fall out of the page cache under memory | ||
235 | * pressure without ever having been dirtied. | ||
236 | */ | ||
237 | static int dax_load_hole(struct address_space *mapping, struct page *page, | ||
238 | struct vm_fault *vmf) | ||
239 | { | ||
240 | unsigned long size; | ||
241 | struct inode *inode = mapping->host; | ||
242 | if (!page) | ||
243 | page = find_or_create_page(mapping, vmf->pgoff, | ||
244 | GFP_KERNEL | __GFP_ZERO); | ||
245 | if (!page) | ||
246 | return VM_FAULT_OOM; | ||
247 | /* Recheck i_size under page lock to avoid truncate race */ | ||
248 | size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
249 | if (vmf->pgoff >= size) { | ||
250 | unlock_page(page); | ||
251 | page_cache_release(page); | ||
252 | return VM_FAULT_SIGBUS; | ||
253 | } | ||
254 | |||
255 | vmf->page = page; | ||
256 | return VM_FAULT_LOCKED; | ||
257 | } | ||
258 | |||
259 | static int copy_user_bh(struct page *to, struct buffer_head *bh, | ||
260 | unsigned blkbits, unsigned long vaddr) | ||
261 | { | ||
262 | void *vfrom, *vto; | ||
263 | if (dax_get_addr(bh, &vfrom, blkbits) < 0) | ||
264 | return -EIO; | ||
265 | vto = kmap_atomic(to); | ||
266 | copy_user_page(vto, vfrom, vaddr, to); | ||
267 | kunmap_atomic(vto); | ||
268 | return 0; | ||
269 | } | ||
270 | |||
271 | static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, | ||
272 | struct vm_area_struct *vma, struct vm_fault *vmf) | ||
273 | { | ||
274 | struct address_space *mapping = inode->i_mapping; | ||
275 | sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9); | ||
276 | unsigned long vaddr = (unsigned long)vmf->virtual_address; | ||
277 | void *addr; | ||
278 | unsigned long pfn; | ||
279 | pgoff_t size; | ||
280 | int error; | ||
281 | |||
282 | i_mmap_lock_read(mapping); | ||
283 | |||
284 | /* | ||
285 | * Check truncate didn't happen while we were allocating a block. | ||
286 | * If it did, this block may or may not be still allocated to the | ||
287 | * file. We can't tell the filesystem to free it because we can't | ||
288 | * take i_mutex here. In the worst case, the file still has blocks | ||
289 | * allocated past the end of the file. | ||
290 | */ | ||
291 | size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
292 | if (unlikely(vmf->pgoff >= size)) { | ||
293 | error = -EIO; | ||
294 | goto out; | ||
295 | } | ||
296 | |||
297 | error = bdev_direct_access(bh->b_bdev, sector, &addr, &pfn, bh->b_size); | ||
298 | if (error < 0) | ||
299 | goto out; | ||
300 | if (error < PAGE_SIZE) { | ||
301 | error = -EIO; | ||
302 | goto out; | ||
303 | } | ||
304 | |||
305 | if (buffer_unwritten(bh) || buffer_new(bh)) | ||
306 | clear_page(addr); | ||
307 | |||
308 | error = vm_insert_mixed(vma, vaddr, pfn); | ||
309 | |||
310 | out: | ||
311 | i_mmap_unlock_read(mapping); | ||
312 | |||
313 | if (bh->b_end_io) | ||
314 | bh->b_end_io(bh, 1); | ||
315 | |||
316 | return error; | ||
317 | } | ||
318 | |||
319 | static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | ||
320 | get_block_t get_block) | ||
321 | { | ||
322 | struct file *file = vma->vm_file; | ||
323 | struct address_space *mapping = file->f_mapping; | ||
324 | struct inode *inode = mapping->host; | ||
325 | struct page *page; | ||
326 | struct buffer_head bh; | ||
327 | unsigned long vaddr = (unsigned long)vmf->virtual_address; | ||
328 | unsigned blkbits = inode->i_blkbits; | ||
329 | sector_t block; | ||
330 | pgoff_t size; | ||
331 | int error; | ||
332 | int major = 0; | ||
333 | |||
334 | size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
335 | if (vmf->pgoff >= size) | ||
336 | return VM_FAULT_SIGBUS; | ||
337 | |||
338 | memset(&bh, 0, sizeof(bh)); | ||
339 | block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits); | ||
340 | bh.b_size = PAGE_SIZE; | ||
341 | |||
342 | repeat: | ||
343 | page = find_get_page(mapping, vmf->pgoff); | ||
344 | if (page) { | ||
345 | if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) { | ||
346 | page_cache_release(page); | ||
347 | return VM_FAULT_RETRY; | ||
348 | } | ||
349 | if (unlikely(page->mapping != mapping)) { | ||
350 | unlock_page(page); | ||
351 | page_cache_release(page); | ||
352 | goto repeat; | ||
353 | } | ||
354 | size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
355 | if (unlikely(vmf->pgoff >= size)) { | ||
356 | /* | ||
357 | * We have a struct page covering a hole in the file | ||
358 | * from a read fault and we've raced with a truncate | ||
359 | */ | ||
360 | error = -EIO; | ||
361 | goto unlock_page; | ||
362 | } | ||
363 | } | ||
364 | |||
365 | error = get_block(inode, block, &bh, 0); | ||
366 | if (!error && (bh.b_size < PAGE_SIZE)) | ||
367 | error = -EIO; /* fs corruption? */ | ||
368 | if (error) | ||
369 | goto unlock_page; | ||
370 | |||
371 | if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) { | ||
372 | if (vmf->flags & FAULT_FLAG_WRITE) { | ||
373 | error = get_block(inode, block, &bh, 1); | ||
374 | count_vm_event(PGMAJFAULT); | ||
375 | mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); | ||
376 | major = VM_FAULT_MAJOR; | ||
377 | if (!error && (bh.b_size < PAGE_SIZE)) | ||
378 | error = -EIO; | ||
379 | if (error) | ||
380 | goto unlock_page; | ||
381 | } else { | ||
382 | return dax_load_hole(mapping, page, vmf); | ||
383 | } | ||
384 | } | ||
385 | |||
386 | if (vmf->cow_page) { | ||
387 | struct page *new_page = vmf->cow_page; | ||
388 | if (buffer_written(&bh)) | ||
389 | error = copy_user_bh(new_page, &bh, blkbits, vaddr); | ||
390 | else | ||
391 | clear_user_highpage(new_page, vaddr); | ||
392 | if (error) | ||
393 | goto unlock_page; | ||
394 | vmf->page = page; | ||
395 | if (!page) { | ||
396 | i_mmap_lock_read(mapping); | ||
397 | /* Check we didn't race with truncate */ | ||
398 | size = (i_size_read(inode) + PAGE_SIZE - 1) >> | ||
399 | PAGE_SHIFT; | ||
400 | if (vmf->pgoff >= size) { | ||
401 | i_mmap_unlock_read(mapping); | ||
402 | error = -EIO; | ||
403 | goto out; | ||
404 | } | ||
405 | } | ||
406 | return VM_FAULT_LOCKED; | ||
407 | } | ||
408 | |||
409 | /* Check we didn't race with a read fault installing a new page */ | ||
410 | if (!page && major) | ||
411 | page = find_lock_page(mapping, vmf->pgoff); | ||
412 | |||
413 | if (page) { | ||
414 | unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT, | ||
415 | PAGE_CACHE_SIZE, 0); | ||
416 | delete_from_page_cache(page); | ||
417 | unlock_page(page); | ||
418 | page_cache_release(page); | ||
419 | } | ||
420 | |||
421 | error = dax_insert_mapping(inode, &bh, vma, vmf); | ||
422 | |||
423 | out: | ||
424 | if (error == -ENOMEM) | ||
425 | return VM_FAULT_OOM | major; | ||
426 | /* -EBUSY is fine, somebody else faulted on the same PTE */ | ||
427 | if ((error < 0) && (error != -EBUSY)) | ||
428 | return VM_FAULT_SIGBUS | major; | ||
429 | return VM_FAULT_NOPAGE | major; | ||
430 | |||
431 | unlock_page: | ||
432 | if (page) { | ||
433 | unlock_page(page); | ||
434 | page_cache_release(page); | ||
435 | } | ||
436 | goto out; | ||
437 | } | ||
438 | |||
439 | /** | ||
440 | * dax_fault - handle a page fault on a DAX file | ||
441 | * @vma: The virtual memory area where the fault occurred | ||
442 | * @vmf: The description of the fault | ||
443 | * @get_block: The filesystem method used to translate file offsets to blocks | ||
444 | * | ||
445 | * When a page fault occurs, filesystems may call this helper in their | ||
446 | * fault handler for DAX files. | ||
447 | */ | ||
448 | int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | ||
449 | get_block_t get_block) | ||
450 | { | ||
451 | int result; | ||
452 | struct super_block *sb = file_inode(vma->vm_file)->i_sb; | ||
453 | |||
454 | if (vmf->flags & FAULT_FLAG_WRITE) { | ||
455 | sb_start_pagefault(sb); | ||
456 | file_update_time(vma->vm_file); | ||
457 | } | ||
458 | result = do_dax_fault(vma, vmf, get_block); | ||
459 | if (vmf->flags & FAULT_FLAG_WRITE) | ||
460 | sb_end_pagefault(sb); | ||
461 | |||
462 | return result; | ||
463 | } | ||
464 | EXPORT_SYMBOL_GPL(dax_fault); | ||
465 | |||
466 | /** | ||
467 | * dax_zero_page_range - zero a range within a page of a DAX file | ||
468 | * @inode: The file being truncated | ||
469 | * @from: The file offset that is being truncated to | ||
470 | * @length: The number of bytes to zero | ||
471 | * @get_block: The filesystem method used to translate file offsets to blocks | ||
472 | * | ||
473 | * This function can be called by a filesystem when it is zeroing part of a | ||
474 | * page in a DAX file. This is intended for hole-punch operations. If | ||
475 | * you are truncating a file, the helper function dax_truncate_page() may be | ||
476 | * more convenient. | ||
477 | * | ||
478 | * We work in terms of PAGE_CACHE_SIZE here for commonality with | ||
479 | * block_truncate_page(), but we could go down to PAGE_SIZE if the filesystem | ||
480 | * took care of disposing of the unnecessary blocks. Even if the filesystem | ||
481 | * block size is smaller than PAGE_SIZE, we have to zero the rest of the page | ||
482 | * since the file might be mmapped. | ||
483 | */ | ||
484 | int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length, | ||
485 | get_block_t get_block) | ||
486 | { | ||
487 | struct buffer_head bh; | ||
488 | pgoff_t index = from >> PAGE_CACHE_SHIFT; | ||
489 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
490 | int err; | ||
491 | |||
492 | /* Block boundary? Nothing to do */ | ||
493 | if (!length) | ||
494 | return 0; | ||
495 | BUG_ON((offset + length) > PAGE_CACHE_SIZE); | ||
496 | |||
497 | memset(&bh, 0, sizeof(bh)); | ||
498 | bh.b_size = PAGE_CACHE_SIZE; | ||
499 | err = get_block(inode, index, &bh, 0); | ||
500 | if (err < 0) | ||
501 | return err; | ||
502 | if (buffer_written(&bh)) { | ||
503 | void *addr; | ||
504 | err = dax_get_addr(&bh, &addr, inode->i_blkbits); | ||
505 | if (err < 0) | ||
506 | return err; | ||
507 | memset(addr + offset, 0, length); | ||
508 | } | ||
509 | |||
510 | return 0; | ||
511 | } | ||
512 | EXPORT_SYMBOL_GPL(dax_zero_page_range); | ||
513 | |||
514 | /** | ||
515 | * dax_truncate_page - handle a partial page being truncated in a DAX file | ||
516 | * @inode: The file being truncated | ||
517 | * @from: The file offset that is being truncated to | ||
518 | * @get_block: The filesystem method used to translate file offsets to blocks | ||
519 | * | ||
520 | * Similar to block_truncate_page(), this function can be called by a | ||
521 | * filesystem when it is truncating a DAX file to handle the partial page. | ||
522 | * | ||
523 | * We work in terms of PAGE_CACHE_SIZE here for commonality with | ||
524 | * block_truncate_page(), but we could go down to PAGE_SIZE if the filesystem | ||
525 | * took care of disposing of the unnecessary blocks. Even if the filesystem | ||
526 | * block size is smaller than PAGE_SIZE, we have to zero the rest of the page | ||
527 | * since the file might be mmapped. | ||
528 | */ | ||
529 | int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block) | ||
530 | { | ||
531 | unsigned length = PAGE_CACHE_ALIGN(from) - from; | ||
532 | return dax_zero_page_range(inode, from, length, get_block); | ||
533 | } | ||
534 | EXPORT_SYMBOL_GPL(dax_truncate_page); | ||
diff --git a/fs/dcache.c b/fs/dcache.c index e368d4f412f9..dc400fd29f4d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -38,6 +38,8 @@ | |||
38 | #include <linux/prefetch.h> | 38 | #include <linux/prefetch.h> |
39 | #include <linux/ratelimit.h> | 39 | #include <linux/ratelimit.h> |
40 | #include <linux/list_lru.h> | 40 | #include <linux/list_lru.h> |
41 | #include <linux/kasan.h> | ||
42 | |||
41 | #include "internal.h" | 43 | #include "internal.h" |
42 | #include "mount.h" | 44 | #include "mount.h" |
43 | 45 | ||
@@ -400,19 +402,20 @@ static void d_shrink_add(struct dentry *dentry, struct list_head *list) | |||
400 | * LRU lists entirely, while shrink_move moves it to the indicated | 402 | * LRU lists entirely, while shrink_move moves it to the indicated |
401 | * private list. | 403 | * private list. |
402 | */ | 404 | */ |
403 | static void d_lru_isolate(struct dentry *dentry) | 405 | static void d_lru_isolate(struct list_lru_one *lru, struct dentry *dentry) |
404 | { | 406 | { |
405 | D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); | 407 | D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); |
406 | dentry->d_flags &= ~DCACHE_LRU_LIST; | 408 | dentry->d_flags &= ~DCACHE_LRU_LIST; |
407 | this_cpu_dec(nr_dentry_unused); | 409 | this_cpu_dec(nr_dentry_unused); |
408 | list_del_init(&dentry->d_lru); | 410 | list_lru_isolate(lru, &dentry->d_lru); |
409 | } | 411 | } |
410 | 412 | ||
411 | static void d_lru_shrink_move(struct dentry *dentry, struct list_head *list) | 413 | static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry, |
414 | struct list_head *list) | ||
412 | { | 415 | { |
413 | D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); | 416 | D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); |
414 | dentry->d_flags |= DCACHE_SHRINK_LIST; | 417 | dentry->d_flags |= DCACHE_SHRINK_LIST; |
415 | list_move_tail(&dentry->d_lru, list); | 418 | list_lru_isolate_move(lru, &dentry->d_lru, list); |
416 | } | 419 | } |
417 | 420 | ||
418 | /* | 421 | /* |
@@ -508,7 +511,7 @@ static void __dentry_kill(struct dentry *dentry) | |||
508 | * dentry_iput drops the locks, at which point nobody (except | 511 | * dentry_iput drops the locks, at which point nobody (except |
509 | * transient RCU lookups) can reach this dentry. | 512 | * transient RCU lookups) can reach this dentry. |
510 | */ | 513 | */ |
511 | BUG_ON((int)dentry->d_lockref.count > 0); | 514 | BUG_ON(dentry->d_lockref.count > 0); |
512 | this_cpu_dec(nr_dentry); | 515 | this_cpu_dec(nr_dentry); |
513 | if (dentry->d_op && dentry->d_op->d_release) | 516 | if (dentry->d_op && dentry->d_op->d_release) |
514 | dentry->d_op->d_release(dentry); | 517 | dentry->d_op->d_release(dentry); |
@@ -561,7 +564,7 @@ static inline struct dentry *lock_parent(struct dentry *dentry) | |||
561 | struct dentry *parent = dentry->d_parent; | 564 | struct dentry *parent = dentry->d_parent; |
562 | if (IS_ROOT(dentry)) | 565 | if (IS_ROOT(dentry)) |
563 | return NULL; | 566 | return NULL; |
564 | if (unlikely((int)dentry->d_lockref.count < 0)) | 567 | if (unlikely(dentry->d_lockref.count < 0)) |
565 | return NULL; | 568 | return NULL; |
566 | if (likely(spin_trylock(&parent->d_lock))) | 569 | if (likely(spin_trylock(&parent->d_lock))) |
567 | return parent; | 570 | return parent; |
@@ -590,6 +593,110 @@ again: | |||
590 | return parent; | 593 | return parent; |
591 | } | 594 | } |
592 | 595 | ||
596 | /* | ||
597 | * Try to do a lockless dput(), and return whether that was successful. | ||
598 | * | ||
599 | * If unsuccessful, we return false, having already taken the dentry lock. | ||
600 | * | ||
601 | * The caller needs to hold the RCU read lock, so that the dentry is | ||
602 | * guaranteed to stay around even if the refcount goes down to zero! | ||
603 | */ | ||
604 | static inline bool fast_dput(struct dentry *dentry) | ||
605 | { | ||
606 | int ret; | ||
607 | unsigned int d_flags; | ||
608 | |||
609 | /* | ||
610 | * If we have a d_op->d_delete() operation, we sould not | ||
611 | * let the dentry count go to zero, so use "put__or_lock". | ||
612 | */ | ||
613 | if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) | ||
614 | return lockref_put_or_lock(&dentry->d_lockref); | ||
615 | |||
616 | /* | ||
617 | * .. otherwise, we can try to just decrement the | ||
618 | * lockref optimistically. | ||
619 | */ | ||
620 | ret = lockref_put_return(&dentry->d_lockref); | ||
621 | |||
622 | /* | ||
623 | * If the lockref_put_return() failed due to the lock being held | ||
624 | * by somebody else, the fast path has failed. We will need to | ||
625 | * get the lock, and then check the count again. | ||
626 | */ | ||
627 | if (unlikely(ret < 0)) { | ||
628 | spin_lock(&dentry->d_lock); | ||
629 | if (dentry->d_lockref.count > 1) { | ||
630 | dentry->d_lockref.count--; | ||
631 | spin_unlock(&dentry->d_lock); | ||
632 | return 1; | ||
633 | } | ||
634 | return 0; | ||
635 | } | ||
636 | |||
637 | /* | ||
638 | * If we weren't the last ref, we're done. | ||
639 | */ | ||
640 | if (ret) | ||
641 | return 1; | ||
642 | |||
643 | /* | ||
644 | * Careful, careful. The reference count went down | ||
645 | * to zero, but we don't hold the dentry lock, so | ||
646 | * somebody else could get it again, and do another | ||
647 | * dput(), and we need to not race with that. | ||
648 | * | ||
649 | * However, there is a very special and common case | ||
650 | * where we don't care, because there is nothing to | ||
651 | * do: the dentry is still hashed, it does not have | ||
652 | * a 'delete' op, and it's referenced and already on | ||
653 | * the LRU list. | ||
654 | * | ||
655 | * NOTE! Since we aren't locked, these values are | ||
656 | * not "stable". However, it is sufficient that at | ||
657 | * some point after we dropped the reference the | ||
658 | * dentry was hashed and the flags had the proper | ||
659 | * value. Other dentry users may have re-gotten | ||
660 | * a reference to the dentry and change that, but | ||
661 | * our work is done - we can leave the dentry | ||
662 | * around with a zero refcount. | ||
663 | */ | ||
664 | smp_rmb(); | ||
665 | d_flags = ACCESS_ONCE(dentry->d_flags); | ||
666 | d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST; | ||
667 | |||
668 | /* Nothing to do? Dropping the reference was all we needed? */ | ||
669 | if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry)) | ||
670 | return 1; | ||
671 | |||
672 | /* | ||
673 | * Not the fast normal case? Get the lock. We've already decremented | ||
674 | * the refcount, but we'll need to re-check the situation after | ||
675 | * getting the lock. | ||
676 | */ | ||
677 | spin_lock(&dentry->d_lock); | ||
678 | |||
679 | /* | ||
680 | * Did somebody else grab a reference to it in the meantime, and | ||
681 | * we're no longer the last user after all? Alternatively, somebody | ||
682 | * else could have killed it and marked it dead. Either way, we | ||
683 | * don't need to do anything else. | ||
684 | */ | ||
685 | if (dentry->d_lockref.count) { | ||
686 | spin_unlock(&dentry->d_lock); | ||
687 | return 1; | ||
688 | } | ||
689 | |||
690 | /* | ||
691 | * Re-get the reference we optimistically dropped. We hold the | ||
692 | * lock, and we just tested that it was zero, so we can just | ||
693 | * set it to 1. | ||
694 | */ | ||
695 | dentry->d_lockref.count = 1; | ||
696 | return 0; | ||
697 | } | ||
698 | |||
699 | |||
593 | /* | 700 | /* |
594 | * This is dput | 701 | * This is dput |
595 | * | 702 | * |
@@ -622,8 +729,14 @@ void dput(struct dentry *dentry) | |||
622 | return; | 729 | return; |
623 | 730 | ||
624 | repeat: | 731 | repeat: |
625 | if (lockref_put_or_lock(&dentry->d_lockref)) | 732 | rcu_read_lock(); |
733 | if (likely(fast_dput(dentry))) { | ||
734 | rcu_read_unlock(); | ||
626 | return; | 735 | return; |
736 | } | ||
737 | |||
738 | /* Slow case: now with the dentry lock held */ | ||
739 | rcu_read_unlock(); | ||
627 | 740 | ||
628 | /* Unreachable? Get rid of it */ | 741 | /* Unreachable? Get rid of it */ |
629 | if (unlikely(d_unhashed(dentry))) | 742 | if (unlikely(d_unhashed(dentry))) |
@@ -810,7 +923,7 @@ static void shrink_dentry_list(struct list_head *list) | |||
810 | * We found an inuse dentry which was not removed from | 923 | * We found an inuse dentry which was not removed from |
811 | * the LRU because of laziness during lookup. Do not free it. | 924 | * the LRU because of laziness during lookup. Do not free it. |
812 | */ | 925 | */ |
813 | if ((int)dentry->d_lockref.count > 0) { | 926 | if (dentry->d_lockref.count > 0) { |
814 | spin_unlock(&dentry->d_lock); | 927 | spin_unlock(&dentry->d_lock); |
815 | if (parent) | 928 | if (parent) |
816 | spin_unlock(&parent->d_lock); | 929 | spin_unlock(&parent->d_lock); |
@@ -869,8 +982,8 @@ static void shrink_dentry_list(struct list_head *list) | |||
869 | } | 982 | } |
870 | } | 983 | } |
871 | 984 | ||
872 | static enum lru_status | 985 | static enum lru_status dentry_lru_isolate(struct list_head *item, |
873 | dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) | 986 | struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) |
874 | { | 987 | { |
875 | struct list_head *freeable = arg; | 988 | struct list_head *freeable = arg; |
876 | struct dentry *dentry = container_of(item, struct dentry, d_lru); | 989 | struct dentry *dentry = container_of(item, struct dentry, d_lru); |
@@ -890,7 +1003,7 @@ dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) | |||
890 | * another pass through the LRU. | 1003 | * another pass through the LRU. |
891 | */ | 1004 | */ |
892 | if (dentry->d_lockref.count) { | 1005 | if (dentry->d_lockref.count) { |
893 | d_lru_isolate(dentry); | 1006 | d_lru_isolate(lru, dentry); |
894 | spin_unlock(&dentry->d_lock); | 1007 | spin_unlock(&dentry->d_lock); |
895 | return LRU_REMOVED; | 1008 | return LRU_REMOVED; |
896 | } | 1009 | } |
@@ -921,7 +1034,7 @@ dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) | |||
921 | return LRU_ROTATE; | 1034 | return LRU_ROTATE; |
922 | } | 1035 | } |
923 | 1036 | ||
924 | d_lru_shrink_move(dentry, freeable); | 1037 | d_lru_shrink_move(lru, dentry, freeable); |
925 | spin_unlock(&dentry->d_lock); | 1038 | spin_unlock(&dentry->d_lock); |
926 | 1039 | ||
927 | return LRU_REMOVED; | 1040 | return LRU_REMOVED; |
@@ -930,30 +1043,28 @@ dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) | |||
930 | /** | 1043 | /** |
931 | * prune_dcache_sb - shrink the dcache | 1044 | * prune_dcache_sb - shrink the dcache |
932 | * @sb: superblock | 1045 | * @sb: superblock |
933 | * @nr_to_scan : number of entries to try to free | 1046 | * @sc: shrink control, passed to list_lru_shrink_walk() |
934 | * @nid: which node to scan for freeable entities | ||
935 | * | 1047 | * |
936 | * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is | 1048 | * Attempt to shrink the superblock dcache LRU by @sc->nr_to_scan entries. This |
937 | * done when we need more memory an called from the superblock shrinker | 1049 | * is done when we need more memory and called from the superblock shrinker |
938 | * function. | 1050 | * function. |
939 | * | 1051 | * |
940 | * This function may fail to free any resources if all the dentries are in | 1052 | * This function may fail to free any resources if all the dentries are in |
941 | * use. | 1053 | * use. |
942 | */ | 1054 | */ |
943 | long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan, | 1055 | long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc) |
944 | int nid) | ||
945 | { | 1056 | { |
946 | LIST_HEAD(dispose); | 1057 | LIST_HEAD(dispose); |
947 | long freed; | 1058 | long freed; |
948 | 1059 | ||
949 | freed = list_lru_walk_node(&sb->s_dentry_lru, nid, dentry_lru_isolate, | 1060 | freed = list_lru_shrink_walk(&sb->s_dentry_lru, sc, |
950 | &dispose, &nr_to_scan); | 1061 | dentry_lru_isolate, &dispose); |
951 | shrink_dentry_list(&dispose); | 1062 | shrink_dentry_list(&dispose); |
952 | return freed; | 1063 | return freed; |
953 | } | 1064 | } |
954 | 1065 | ||
955 | static enum lru_status dentry_lru_isolate_shrink(struct list_head *item, | 1066 | static enum lru_status dentry_lru_isolate_shrink(struct list_head *item, |
956 | spinlock_t *lru_lock, void *arg) | 1067 | struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) |
957 | { | 1068 | { |
958 | struct list_head *freeable = arg; | 1069 | struct list_head *freeable = arg; |
959 | struct dentry *dentry = container_of(item, struct dentry, d_lru); | 1070 | struct dentry *dentry = container_of(item, struct dentry, d_lru); |
@@ -966,7 +1077,7 @@ static enum lru_status dentry_lru_isolate_shrink(struct list_head *item, | |||
966 | if (!spin_trylock(&dentry->d_lock)) | 1077 | if (!spin_trylock(&dentry->d_lock)) |
967 | return LRU_SKIP; | 1078 | return LRU_SKIP; |
968 | 1079 | ||
969 | d_lru_shrink_move(dentry, freeable); | 1080 | d_lru_shrink_move(lru, dentry, freeable); |
970 | spin_unlock(&dentry->d_lock); | 1081 | spin_unlock(&dentry->d_lock); |
971 | 1082 | ||
972 | return LRU_REMOVED; | 1083 | return LRU_REMOVED; |
@@ -1430,6 +1541,9 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) | |||
1430 | } | 1541 | } |
1431 | atomic_set(&p->u.count, 1); | 1542 | atomic_set(&p->u.count, 1); |
1432 | dname = p->name; | 1543 | dname = p->name; |
1544 | if (IS_ENABLED(CONFIG_DCACHE_WORD_ACCESS)) | ||
1545 | kasan_unpoison_shadow(dname, | ||
1546 | round_up(name->len + 1, sizeof(unsigned long))); | ||
1433 | } else { | 1547 | } else { |
1434 | dname = dentry->d_iname; | 1548 | dname = dentry->d_iname; |
1435 | } | 1549 | } |
@@ -2187,37 +2301,6 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) | |||
2187 | } | 2301 | } |
2188 | EXPORT_SYMBOL(d_hash_and_lookup); | 2302 | EXPORT_SYMBOL(d_hash_and_lookup); |
2189 | 2303 | ||
2190 | /** | ||
2191 | * d_validate - verify dentry provided from insecure source (deprecated) | ||
2192 | * @dentry: The dentry alleged to be valid child of @dparent | ||
2193 | * @dparent: The parent dentry (known to be valid) | ||
2194 | * | ||
2195 | * An insecure source has sent us a dentry, here we verify it and dget() it. | ||
2196 | * This is used by ncpfs in its readdir implementation. | ||
2197 | * Zero is returned in the dentry is invalid. | ||
2198 | * | ||
2199 | * This function is slow for big directories, and deprecated, do not use it. | ||
2200 | */ | ||
2201 | int d_validate(struct dentry *dentry, struct dentry *dparent) | ||
2202 | { | ||
2203 | struct dentry *child; | ||
2204 | |||
2205 | spin_lock(&dparent->d_lock); | ||
2206 | list_for_each_entry(child, &dparent->d_subdirs, d_child) { | ||
2207 | if (dentry == child) { | ||
2208 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); | ||
2209 | __dget_dlock(dentry); | ||
2210 | spin_unlock(&dentry->d_lock); | ||
2211 | spin_unlock(&dparent->d_lock); | ||
2212 | return 1; | ||
2213 | } | ||
2214 | } | ||
2215 | spin_unlock(&dparent->d_lock); | ||
2216 | |||
2217 | return 0; | ||
2218 | } | ||
2219 | EXPORT_SYMBOL(d_validate); | ||
2220 | |||
2221 | /* | 2304 | /* |
2222 | * When a file is deleted, we have two options: | 2305 | * When a file is deleted, we have two options: |
2223 | * - turn this dentry into a negative dentry | 2306 | * - turn this dentry into a negative dentry |
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 05f2960ed7c3..45b18a5e225c 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c | |||
@@ -34,93 +34,16 @@ static struct vfsmount *debugfs_mount; | |||
34 | static int debugfs_mount_count; | 34 | static int debugfs_mount_count; |
35 | static bool debugfs_registered; | 35 | static bool debugfs_registered; |
36 | 36 | ||
37 | static struct inode *debugfs_get_inode(struct super_block *sb, umode_t mode, dev_t dev, | 37 | static struct inode *debugfs_get_inode(struct super_block *sb) |
38 | void *data, const struct file_operations *fops) | ||
39 | |||
40 | { | 38 | { |
41 | struct inode *inode = new_inode(sb); | 39 | struct inode *inode = new_inode(sb); |
42 | |||
43 | if (inode) { | 40 | if (inode) { |
44 | inode->i_ino = get_next_ino(); | 41 | inode->i_ino = get_next_ino(); |
45 | inode->i_mode = mode; | ||
46 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 42 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
47 | switch (mode & S_IFMT) { | ||
48 | default: | ||
49 | init_special_inode(inode, mode, dev); | ||
50 | break; | ||
51 | case S_IFREG: | ||
52 | inode->i_fop = fops ? fops : &debugfs_file_operations; | ||
53 | inode->i_private = data; | ||
54 | break; | ||
55 | case S_IFLNK: | ||
56 | inode->i_op = &debugfs_link_operations; | ||
57 | inode->i_private = data; | ||
58 | break; | ||
59 | case S_IFDIR: | ||
60 | inode->i_op = &simple_dir_inode_operations; | ||
61 | inode->i_fop = &simple_dir_operations; | ||
62 | |||
63 | /* directory inodes start off with i_nlink == 2 | ||
64 | * (for "." entry) */ | ||
65 | inc_nlink(inode); | ||
66 | break; | ||
67 | } | ||
68 | } | 43 | } |
69 | return inode; | 44 | return inode; |
70 | } | 45 | } |
71 | 46 | ||
72 | /* SMP-safe */ | ||
73 | static int debugfs_mknod(struct inode *dir, struct dentry *dentry, | ||
74 | umode_t mode, dev_t dev, void *data, | ||
75 | const struct file_operations *fops) | ||
76 | { | ||
77 | struct inode *inode; | ||
78 | int error = -EPERM; | ||
79 | |||
80 | if (dentry->d_inode) | ||
81 | return -EEXIST; | ||
82 | |||
83 | inode = debugfs_get_inode(dir->i_sb, mode, dev, data, fops); | ||
84 | if (inode) { | ||
85 | d_instantiate(dentry, inode); | ||
86 | dget(dentry); | ||
87 | error = 0; | ||
88 | } | ||
89 | return error; | ||
90 | } | ||
91 | |||
92 | static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | ||
93 | { | ||
94 | int res; | ||
95 | |||
96 | mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR; | ||
97 | res = debugfs_mknod(dir, dentry, mode, 0, NULL, NULL); | ||
98 | if (!res) { | ||
99 | inc_nlink(dir); | ||
100 | fsnotify_mkdir(dir, dentry); | ||
101 | } | ||
102 | return res; | ||
103 | } | ||
104 | |||
105 | static int debugfs_link(struct inode *dir, struct dentry *dentry, umode_t mode, | ||
106 | void *data) | ||
107 | { | ||
108 | mode = (mode & S_IALLUGO) | S_IFLNK; | ||
109 | return debugfs_mknod(dir, dentry, mode, 0, data, NULL); | ||
110 | } | ||
111 | |||
112 | static int debugfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, | ||
113 | void *data, const struct file_operations *fops) | ||
114 | { | ||
115 | int res; | ||
116 | |||
117 | mode = (mode & S_IALLUGO) | S_IFREG; | ||
118 | res = debugfs_mknod(dir, dentry, mode, 0, data, fops); | ||
119 | if (!res) | ||
120 | fsnotify_create(dir, dentry); | ||
121 | return res; | ||
122 | } | ||
123 | |||
124 | static inline int debugfs_positive(struct dentry *dentry) | 47 | static inline int debugfs_positive(struct dentry *dentry) |
125 | { | 48 | { |
126 | return dentry->d_inode && !d_unhashed(dentry); | 49 | return dentry->d_inode && !d_unhashed(dentry); |
@@ -252,6 +175,18 @@ static const struct super_operations debugfs_super_operations = { | |||
252 | .show_options = debugfs_show_options, | 175 | .show_options = debugfs_show_options, |
253 | }; | 176 | }; |
254 | 177 | ||
178 | static struct vfsmount *debugfs_automount(struct path *path) | ||
179 | { | ||
180 | struct vfsmount *(*f)(void *); | ||
181 | f = (struct vfsmount *(*)(void *))path->dentry->d_fsdata; | ||
182 | return f(path->dentry->d_inode->i_private); | ||
183 | } | ||
184 | |||
185 | static const struct dentry_operations debugfs_dops = { | ||
186 | .d_delete = always_delete_dentry, | ||
187 | .d_automount = debugfs_automount, | ||
188 | }; | ||
189 | |||
255 | static int debug_fill_super(struct super_block *sb, void *data, int silent) | 190 | static int debug_fill_super(struct super_block *sb, void *data, int silent) |
256 | { | 191 | { |
257 | static struct tree_descr debug_files[] = {{""}}; | 192 | static struct tree_descr debug_files[] = {{""}}; |
@@ -276,6 +211,7 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent) | |||
276 | goto fail; | 211 | goto fail; |
277 | 212 | ||
278 | sb->s_op = &debugfs_super_operations; | 213 | sb->s_op = &debugfs_super_operations; |
214 | sb->s_d_op = &debugfs_dops; | ||
279 | 215 | ||
280 | debugfs_apply_options(sb); | 216 | debugfs_apply_options(sb); |
281 | 217 | ||
@@ -302,11 +238,9 @@ static struct file_system_type debug_fs_type = { | |||
302 | }; | 238 | }; |
303 | MODULE_ALIAS_FS("debugfs"); | 239 | MODULE_ALIAS_FS("debugfs"); |
304 | 240 | ||
305 | static struct dentry *__create_file(const char *name, umode_t mode, | 241 | static struct dentry *start_creating(const char *name, struct dentry *parent) |
306 | struct dentry *parent, void *data, | ||
307 | const struct file_operations *fops) | ||
308 | { | 242 | { |
309 | struct dentry *dentry = NULL; | 243 | struct dentry *dentry; |
310 | int error; | 244 | int error; |
311 | 245 | ||
312 | pr_debug("debugfs: creating file '%s'\n",name); | 246 | pr_debug("debugfs: creating file '%s'\n",name); |
@@ -314,7 +248,7 @@ static struct dentry *__create_file(const char *name, umode_t mode, | |||
314 | error = simple_pin_fs(&debug_fs_type, &debugfs_mount, | 248 | error = simple_pin_fs(&debug_fs_type, &debugfs_mount, |
315 | &debugfs_mount_count); | 249 | &debugfs_mount_count); |
316 | if (error) | 250 | if (error) |
317 | goto exit; | 251 | return ERR_PTR(error); |
318 | 252 | ||
319 | /* If the parent is not specified, we create it in the root. | 253 | /* If the parent is not specified, we create it in the root. |
320 | * We need the root dentry to do this, which is in the super | 254 | * We need the root dentry to do this, which is in the super |
@@ -326,31 +260,26 @@ static struct dentry *__create_file(const char *name, umode_t mode, | |||
326 | 260 | ||
327 | mutex_lock(&parent->d_inode->i_mutex); | 261 | mutex_lock(&parent->d_inode->i_mutex); |
328 | dentry = lookup_one_len(name, parent, strlen(name)); | 262 | dentry = lookup_one_len(name, parent, strlen(name)); |
329 | if (!IS_ERR(dentry)) { | 263 | if (!IS_ERR(dentry) && dentry->d_inode) { |
330 | switch (mode & S_IFMT) { | ||
331 | case S_IFDIR: | ||
332 | error = debugfs_mkdir(parent->d_inode, dentry, mode); | ||
333 | |||
334 | break; | ||
335 | case S_IFLNK: | ||
336 | error = debugfs_link(parent->d_inode, dentry, mode, | ||
337 | data); | ||
338 | break; | ||
339 | default: | ||
340 | error = debugfs_create(parent->d_inode, dentry, mode, | ||
341 | data, fops); | ||
342 | break; | ||
343 | } | ||
344 | dput(dentry); | 264 | dput(dentry); |
345 | } else | 265 | dentry = ERR_PTR(-EEXIST); |
346 | error = PTR_ERR(dentry); | ||
347 | mutex_unlock(&parent->d_inode->i_mutex); | ||
348 | |||
349 | if (error) { | ||
350 | dentry = NULL; | ||
351 | simple_release_fs(&debugfs_mount, &debugfs_mount_count); | ||
352 | } | 266 | } |
353 | exit: | 267 | if (IS_ERR(dentry)) |
268 | mutex_unlock(&parent->d_inode->i_mutex); | ||
269 | return dentry; | ||
270 | } | ||
271 | |||
272 | static struct dentry *failed_creating(struct dentry *dentry) | ||
273 | { | ||
274 | mutex_unlock(&dentry->d_parent->d_inode->i_mutex); | ||
275 | dput(dentry); | ||
276 | simple_release_fs(&debugfs_mount, &debugfs_mount_count); | ||
277 | return NULL; | ||
278 | } | ||
279 | |||
280 | static struct dentry *end_creating(struct dentry *dentry) | ||
281 | { | ||
282 | mutex_unlock(&dentry->d_parent->d_inode->i_mutex); | ||
354 | return dentry; | 283 | return dentry; |
355 | } | 284 | } |
356 | 285 | ||
@@ -384,19 +313,71 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode, | |||
384 | struct dentry *parent, void *data, | 313 | struct dentry *parent, void *data, |
385 | const struct file_operations *fops) | 314 | const struct file_operations *fops) |
386 | { | 315 | { |
387 | switch (mode & S_IFMT) { | 316 | struct dentry *dentry; |
388 | case S_IFREG: | 317 | struct inode *inode; |
389 | case 0: | 318 | |
390 | break; | 319 | if (!(mode & S_IFMT)) |
391 | default: | 320 | mode |= S_IFREG; |
392 | BUG(); | 321 | BUG_ON(!S_ISREG(mode)); |
393 | } | 322 | dentry = start_creating(name, parent); |
323 | |||
324 | if (IS_ERR(dentry)) | ||
325 | return NULL; | ||
394 | 326 | ||
395 | return __create_file(name, mode, parent, data, fops); | 327 | inode = debugfs_get_inode(dentry->d_sb); |
328 | if (unlikely(!inode)) | ||
329 | return failed_creating(dentry); | ||
330 | |||
331 | inode->i_mode = mode; | ||
332 | inode->i_fop = fops ? fops : &debugfs_file_operations; | ||
333 | inode->i_private = data; | ||
334 | d_instantiate(dentry, inode); | ||
335 | fsnotify_create(dentry->d_parent->d_inode, dentry); | ||
336 | return end_creating(dentry); | ||
396 | } | 337 | } |
397 | EXPORT_SYMBOL_GPL(debugfs_create_file); | 338 | EXPORT_SYMBOL_GPL(debugfs_create_file); |
398 | 339 | ||
399 | /** | 340 | /** |
341 | * debugfs_create_file_size - create a file in the debugfs filesystem | ||
342 | * @name: a pointer to a string containing the name of the file to create. | ||
343 | * @mode: the permission that the file should have. | ||
344 | * @parent: a pointer to the parent dentry for this file. This should be a | ||
345 | * directory dentry if set. If this parameter is NULL, then the | ||
346 | * file will be created in the root of the debugfs filesystem. | ||
347 | * @data: a pointer to something that the caller will want to get to later | ||
348 | * on. The inode.i_private pointer will point to this value on | ||
349 | * the open() call. | ||
350 | * @fops: a pointer to a struct file_operations that should be used for | ||
351 | * this file. | ||
352 | * @file_size: initial file size | ||
353 | * | ||
354 | * This is the basic "create a file" function for debugfs. It allows for a | ||
355 | * wide range of flexibility in creating a file, or a directory (if you want | ||
356 | * to create a directory, the debugfs_create_dir() function is | ||
357 | * recommended to be used instead.) | ||
358 | * | ||
359 | * This function will return a pointer to a dentry if it succeeds. This | ||
360 | * pointer must be passed to the debugfs_remove() function when the file is | ||
361 | * to be removed (no automatic cleanup happens if your module is unloaded, | ||
362 | * you are responsible here.) If an error occurs, %NULL will be returned. | ||
363 | * | ||
364 | * If debugfs is not enabled in the kernel, the value -%ENODEV will be | ||
365 | * returned. | ||
366 | */ | ||
367 | struct dentry *debugfs_create_file_size(const char *name, umode_t mode, | ||
368 | struct dentry *parent, void *data, | ||
369 | const struct file_operations *fops, | ||
370 | loff_t file_size) | ||
371 | { | ||
372 | struct dentry *de = debugfs_create_file(name, mode, parent, data, fops); | ||
373 | |||
374 | if (de) | ||
375 | de->d_inode->i_size = file_size; | ||
376 | return de; | ||
377 | } | ||
378 | EXPORT_SYMBOL_GPL(debugfs_create_file_size); | ||
379 | |||
380 | /** | ||
400 | * debugfs_create_dir - create a directory in the debugfs filesystem | 381 | * debugfs_create_dir - create a directory in the debugfs filesystem |
401 | * @name: a pointer to a string containing the name of the directory to | 382 | * @name: a pointer to a string containing the name of the directory to |
402 | * create. | 383 | * create. |
@@ -416,12 +397,65 @@ EXPORT_SYMBOL_GPL(debugfs_create_file); | |||
416 | */ | 397 | */ |
417 | struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) | 398 | struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) |
418 | { | 399 | { |
419 | return __create_file(name, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, | 400 | struct dentry *dentry = start_creating(name, parent); |
420 | parent, NULL, NULL); | 401 | struct inode *inode; |
402 | |||
403 | if (IS_ERR(dentry)) | ||
404 | return NULL; | ||
405 | |||
406 | inode = debugfs_get_inode(dentry->d_sb); | ||
407 | if (unlikely(!inode)) | ||
408 | return failed_creating(dentry); | ||
409 | |||
410 | inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; | ||
411 | inode->i_op = &simple_dir_inode_operations; | ||
412 | inode->i_fop = &simple_dir_operations; | ||
413 | |||
414 | /* directory inodes start off with i_nlink == 2 (for "." entry) */ | ||
415 | inc_nlink(inode); | ||
416 | d_instantiate(dentry, inode); | ||
417 | inc_nlink(dentry->d_parent->d_inode); | ||
418 | fsnotify_mkdir(dentry->d_parent->d_inode, dentry); | ||
419 | return end_creating(dentry); | ||
421 | } | 420 | } |
422 | EXPORT_SYMBOL_GPL(debugfs_create_dir); | 421 | EXPORT_SYMBOL_GPL(debugfs_create_dir); |
423 | 422 | ||
424 | /** | 423 | /** |
424 | * debugfs_create_automount - create automount point in the debugfs filesystem | ||
425 | * @name: a pointer to a string containing the name of the file to create. | ||
426 | * @parent: a pointer to the parent dentry for this file. This should be a | ||
427 | * directory dentry if set. If this parameter is NULL, then the | ||
428 | * file will be created in the root of the debugfs filesystem. | ||
429 | * @f: function to be called when pathname resolution steps on that one. | ||
430 | * @data: opaque argument to pass to f(). | ||
431 | * | ||
432 | * @f should return what ->d_automount() would. | ||
433 | */ | ||
434 | struct dentry *debugfs_create_automount(const char *name, | ||
435 | struct dentry *parent, | ||
436 | struct vfsmount *(*f)(void *), | ||
437 | void *data) | ||
438 | { | ||
439 | struct dentry *dentry = start_creating(name, parent); | ||
440 | struct inode *inode; | ||
441 | |||
442 | if (IS_ERR(dentry)) | ||
443 | return NULL; | ||
444 | |||
445 | inode = debugfs_get_inode(dentry->d_sb); | ||
446 | if (unlikely(!inode)) | ||
447 | return failed_creating(dentry); | ||
448 | |||
449 | inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; | ||
450 | inode->i_flags |= S_AUTOMOUNT; | ||
451 | inode->i_private = data; | ||
452 | dentry->d_fsdata = (void *)f; | ||
453 | d_instantiate(dentry, inode); | ||
454 | return end_creating(dentry); | ||
455 | } | ||
456 | EXPORT_SYMBOL(debugfs_create_automount); | ||
457 | |||
458 | /** | ||
425 | * debugfs_create_symlink- create a symbolic link in the debugfs filesystem | 459 | * debugfs_create_symlink- create a symbolic link in the debugfs filesystem |
426 | * @name: a pointer to a string containing the name of the symbolic link to | 460 | * @name: a pointer to a string containing the name of the symbolic link to |
427 | * create. | 461 | * create. |
@@ -447,17 +481,28 @@ EXPORT_SYMBOL_GPL(debugfs_create_dir); | |||
447 | struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, | 481 | struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, |
448 | const char *target) | 482 | const char *target) |
449 | { | 483 | { |
450 | struct dentry *result; | 484 | struct dentry *dentry; |
451 | char *link; | 485 | struct inode *inode; |
452 | 486 | char *link = kstrdup(target, GFP_KERNEL); | |
453 | link = kstrdup(target, GFP_KERNEL); | ||
454 | if (!link) | 487 | if (!link) |
455 | return NULL; | 488 | return NULL; |
456 | 489 | ||
457 | result = __create_file(name, S_IFLNK | S_IRWXUGO, parent, link, NULL); | 490 | dentry = start_creating(name, parent); |
458 | if (!result) | 491 | if (IS_ERR(dentry)) { |
459 | kfree(link); | 492 | kfree(link); |
460 | return result; | 493 | return NULL; |
494 | } | ||
495 | |||
496 | inode = debugfs_get_inode(dentry->d_sb); | ||
497 | if (unlikely(!inode)) { | ||
498 | kfree(link); | ||
499 | return failed_creating(dentry); | ||
500 | } | ||
501 | inode->i_mode = S_IFLNK | S_IRWXUGO; | ||
502 | inode->i_op = &debugfs_link_operations; | ||
503 | inode->i_private = link; | ||
504 | d_instantiate(dentry, inode); | ||
505 | return end_creating(dentry); | ||
461 | } | 506 | } |
462 | EXPORT_SYMBOL_GPL(debugfs_create_symlink); | 507 | EXPORT_SYMBOL_GPL(debugfs_create_symlink); |
463 | 508 | ||
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index e7cfbaf8d0e2..1e6e227134d7 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c | |||
@@ -56,13 +56,8 @@ static int send_data(struct sk_buff *skb) | |||
56 | { | 56 | { |
57 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); | 57 | struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); |
58 | void *data = genlmsg_data(genlhdr); | 58 | void *data = genlmsg_data(genlhdr); |
59 | int rv; | ||
60 | 59 | ||
61 | rv = genlmsg_end(skb, data); | 60 | genlmsg_end(skb, data); |
62 | if (rv < 0) { | ||
63 | nlmsg_free(skb); | ||
64 | return rv; | ||
65 | } | ||
66 | 61 | ||
67 | return genlmsg_unicast(&init_net, skb, listener_nlportid); | 62 | return genlmsg_unicast(&init_net, skb, listener_nlportid); |
68 | } | 63 | } |
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 2bc2c87f35e7..5718cb9f7273 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c | |||
@@ -37,20 +37,6 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) | |||
37 | iput(toput_inode); | 37 | iput(toput_inode); |
38 | } | 38 | } |
39 | 39 | ||
40 | static void drop_slab(void) | ||
41 | { | ||
42 | int nr_objects; | ||
43 | |||
44 | do { | ||
45 | int nid; | ||
46 | |||
47 | nr_objects = 0; | ||
48 | for_each_online_node(nid) | ||
49 | nr_objects += shrink_node_slabs(GFP_KERNEL, nid, | ||
50 | 1000, 1000); | ||
51 | } while (nr_objects > 10); | ||
52 | } | ||
53 | |||
54 | int drop_caches_sysctl_handler(struct ctl_table *table, int write, | 40 | int drop_caches_sysctl_handler(struct ctl_table *table, int write, |
55 | void __user *buffer, size_t *length, loff_t *ppos) | 41 | void __user *buffer, size_t *length, loff_t *ppos) |
56 | { | 42 | { |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 1686dc2da9fd..34b36a504059 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -67,7 +67,6 @@ static int ecryptfs_inode_set(struct inode *inode, void *opaque) | |||
67 | inode->i_ino = lower_inode->i_ino; | 67 | inode->i_ino = lower_inode->i_ino; |
68 | inode->i_version++; | 68 | inode->i_version++; |
69 | inode->i_mapping->a_ops = &ecryptfs_aops; | 69 | inode->i_mapping->a_ops = &ecryptfs_aops; |
70 | inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; | ||
71 | 70 | ||
72 | if (S_ISLNK(inode->i_mode)) | 71 | if (S_ISLNK(inode->i_mode)) |
73 | inode->i_op = &ecryptfs_symlink_iops; | 72 | inode->i_op = &ecryptfs_symlink_iops; |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index d9eb84bda559..1895d60f4122 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -520,7 +520,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags | |||
520 | goto out; | 520 | goto out; |
521 | } | 521 | } |
522 | 522 | ||
523 | rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY); | 523 | rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs"); |
524 | if (rc) | 524 | if (rc) |
525 | goto out1; | 525 | goto out1; |
526 | 526 | ||
diff --git a/fs/efivarfs/Kconfig b/fs/efivarfs/Kconfig index 367bbb10c543..c2499ef174a2 100644 --- a/fs/efivarfs/Kconfig +++ b/fs/efivarfs/Kconfig | |||
@@ -1,6 +1,7 @@ | |||
1 | config EFIVAR_FS | 1 | config EFIVAR_FS |
2 | tristate "EFI Variable filesystem" | 2 | tristate "EFI Variable filesystem" |
3 | depends on EFI | 3 | depends on EFI |
4 | default m | ||
4 | help | 5 | help |
5 | efivarfs is a replacement filesystem for the old EFI | 6 | efivarfs is a replacement filesystem for the old EFI |
6 | variable support via sysfs, as it doesn't suffer from the | 7 | variable support via sysfs, as it doesn't suffer from the |
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 6dad1176ec52..ddbce42548c9 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c | |||
@@ -140,7 +140,7 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, | |||
140 | 140 | ||
141 | name[len] = '-'; | 141 | name[len] = '-'; |
142 | 142 | ||
143 | efi_guid_unparse(&entry->var.VendorGuid, name + len + 1); | 143 | efi_guid_to_str(&entry->var.VendorGuid, name + len + 1); |
144 | 144 | ||
145 | name[len + EFI_VARIABLE_GUID_LEN+1] = '\0'; | 145 | name[len + EFI_VARIABLE_GUID_LEN+1] = '\0'; |
146 | 146 | ||
diff --git a/fs/eventfd.c b/fs/eventfd.c index 4b0a226024fa..8d0c0df01854 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c | |||
@@ -118,18 +118,18 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait) | |||
118 | { | 118 | { |
119 | struct eventfd_ctx *ctx = file->private_data; | 119 | struct eventfd_ctx *ctx = file->private_data; |
120 | unsigned int events = 0; | 120 | unsigned int events = 0; |
121 | unsigned long flags; | 121 | u64 count; |
122 | 122 | ||
123 | poll_wait(file, &ctx->wqh, wait); | 123 | poll_wait(file, &ctx->wqh, wait); |
124 | smp_rmb(); | ||
125 | count = ctx->count; | ||
124 | 126 | ||
125 | spin_lock_irqsave(&ctx->wqh.lock, flags); | 127 | if (count > 0) |
126 | if (ctx->count > 0) | ||
127 | events |= POLLIN; | 128 | events |= POLLIN; |
128 | if (ctx->count == ULLONG_MAX) | 129 | if (count == ULLONG_MAX) |
129 | events |= POLLERR; | 130 | events |= POLLERR; |
130 | if (ULLONG_MAX - 1 > ctx->count) | 131 | if (ULLONG_MAX - 1 > count) |
131 | events |= POLLOUT; | 132 | events |= POLLOUT; |
132 | spin_unlock_irqrestore(&ctx->wqh.lock, flags); | ||
133 | 133 | ||
134 | return events; | 134 | return events; |
135 | } | 135 | } |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index d77f94491352..1e009cad8d5c 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -1639,9 +1639,9 @@ fetch_events: | |||
1639 | 1639 | ||
1640 | spin_lock_irqsave(&ep->lock, flags); | 1640 | spin_lock_irqsave(&ep->lock, flags); |
1641 | } | 1641 | } |
1642 | __remove_wait_queue(&ep->wq, &wait); | ||
1643 | 1642 | ||
1644 | set_current_state(TASK_RUNNING); | 1643 | __remove_wait_queue(&ep->wq, &wait); |
1644 | __set_current_state(TASK_RUNNING); | ||
1645 | } | 1645 | } |
1646 | check_events: | 1646 | check_events: |
1647 | /* Is it worth to try to dig for events ? */ | 1647 | /* Is it worth to try to dig for events ? */ |
@@ -794,8 +794,14 @@ exit: | |||
794 | 794 | ||
795 | struct file *open_exec(const char *name) | 795 | struct file *open_exec(const char *name) |
796 | { | 796 | { |
797 | struct filename tmp = { .name = name }; | 797 | struct filename *filename = getname_kernel(name); |
798 | return do_open_execat(AT_FDCWD, &tmp, 0); | 798 | struct file *f = ERR_CAST(filename); |
799 | |||
800 | if (!IS_ERR(filename)) { | ||
801 | f = do_open_execat(AT_FDCWD, filename, 0); | ||
802 | putname(filename); | ||
803 | } | ||
804 | return f; | ||
799 | } | 805 | } |
800 | EXPORT_SYMBOL(open_exec); | 806 | EXPORT_SYMBOL(open_exec); |
801 | 807 | ||
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index f1d3d4eb8c4f..a198e94813fe 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -985,7 +985,6 @@ const struct address_space_operations exofs_aops = { | |||
985 | .direct_IO = exofs_direct_IO, | 985 | .direct_IO = exofs_direct_IO, |
986 | 986 | ||
987 | /* With these NULL has special meaning or default is not exported */ | 987 | /* With these NULL has special meaning or default is not exported */ |
988 | .get_xip_mem = NULL, | ||
989 | .migratepage = NULL, | 988 | .migratepage = NULL, |
990 | .launder_page = NULL, | 989 | .launder_page = NULL, |
991 | .is_partially_uptodate = NULL, | 990 | .is_partially_uptodate = NULL, |
@@ -1214,7 +1213,6 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | |||
1214 | memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); | 1213 | memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); |
1215 | } | 1214 | } |
1216 | 1215 | ||
1217 | inode->i_mapping->backing_dev_info = sb->s_bdi; | ||
1218 | if (S_ISREG(inode->i_mode)) { | 1216 | if (S_ISREG(inode->i_mode)) { |
1219 | inode->i_op = &exofs_file_inode_operations; | 1217 | inode->i_op = &exofs_file_inode_operations; |
1220 | inode->i_fop = &exofs_file_operations; | 1218 | inode->i_fop = &exofs_file_operations; |
@@ -1314,7 +1312,6 @@ struct inode *exofs_new_inode(struct inode *dir, umode_t mode) | |||
1314 | 1312 | ||
1315 | set_obj_2bcreated(oi); | 1313 | set_obj_2bcreated(oi); |
1316 | 1314 | ||
1317 | inode->i_mapping->backing_dev_info = sb->s_bdi; | ||
1318 | inode_init_owner(inode, dir, mode); | 1315 | inode_init_owner(inode, dir, mode); |
1319 | inode->i_ino = sbi->s_nextid++; | 1316 | inode->i_ino = sbi->s_nextid++; |
1320 | inode->i_blkbits = EXOFS_BLKSHIFT; | 1317 | inode->i_blkbits = EXOFS_BLKSHIFT; |
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 95965503afcb..fcc2e565f540 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -836,7 +836,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
836 | goto free_sbi; | 836 | goto free_sbi; |
837 | } | 837 | } |
838 | 838 | ||
839 | ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); | 839 | ret = bdi_setup_and_register(&sbi->bdi, "exofs"); |
840 | if (ret) { | 840 | if (ret) { |
841 | EXOFS_DBGMSG("Failed to bdi_setup_and_register\n"); | 841 | EXOFS_DBGMSG("Failed to bdi_setup_and_register\n"); |
842 | dput(sb->s_root); | 842 | dput(sb->s_root); |
diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig index 14a6780fd034..c634874e12d9 100644 --- a/fs/ext2/Kconfig +++ b/fs/ext2/Kconfig | |||
@@ -42,14 +42,3 @@ config EXT2_FS_SECURITY | |||
42 | 42 | ||
43 | If you are not using a security module that requires using | 43 | If you are not using a security module that requires using |
44 | extended attributes for file security labels, say N. | 44 | extended attributes for file security labels, say N. |
45 | |||
46 | config EXT2_FS_XIP | ||
47 | bool "Ext2 execute in place support" | ||
48 | depends on EXT2_FS && MMU | ||
49 | help | ||
50 | Execute in place can be used on memory-backed block devices. If you | ||
51 | enable this option, you can select to mount block devices which are | ||
52 | capable of this feature without using the page cache. | ||
53 | |||
54 | If you do not use a block device that is capable of using this, | ||
55 | or if unsure, say N. | ||
diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile index f42af45cfd88..445b0e996a12 100644 --- a/fs/ext2/Makefile +++ b/fs/ext2/Makefile | |||
@@ -10,4 +10,3 @@ ext2-y := balloc.o dir.o file.o ialloc.o inode.o \ | |||
10 | ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o | 10 | ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o |
11 | ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o | 11 | ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o |
12 | ext2-$(CONFIG_EXT2_FS_SECURITY) += xattr_security.o | 12 | ext2-$(CONFIG_EXT2_FS_SECURITY) += xattr_security.o |
13 | ext2-$(CONFIG_EXT2_FS_XIP) += xip.o | ||
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index e4279ead4a05..678f9ab08c48 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h | |||
@@ -380,10 +380,15 @@ struct ext2_inode { | |||
380 | #define EXT2_MOUNT_NO_UID32 0x000200 /* Disable 32-bit UIDs */ | 380 | #define EXT2_MOUNT_NO_UID32 0x000200 /* Disable 32-bit UIDs */ |
381 | #define EXT2_MOUNT_XATTR_USER 0x004000 /* Extended user attributes */ | 381 | #define EXT2_MOUNT_XATTR_USER 0x004000 /* Extended user attributes */ |
382 | #define EXT2_MOUNT_POSIX_ACL 0x008000 /* POSIX Access Control Lists */ | 382 | #define EXT2_MOUNT_POSIX_ACL 0x008000 /* POSIX Access Control Lists */ |
383 | #define EXT2_MOUNT_XIP 0x010000 /* Execute in place */ | 383 | #define EXT2_MOUNT_XIP 0x010000 /* Obsolete, use DAX */ |
384 | #define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */ | 384 | #define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */ |
385 | #define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */ | 385 | #define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */ |
386 | #define EXT2_MOUNT_RESERVATION 0x080000 /* Preallocation */ | 386 | #define EXT2_MOUNT_RESERVATION 0x080000 /* Preallocation */ |
387 | #ifdef CONFIG_FS_DAX | ||
388 | #define EXT2_MOUNT_DAX 0x100000 /* Direct Access */ | ||
389 | #else | ||
390 | #define EXT2_MOUNT_DAX 0 | ||
391 | #endif | ||
387 | 392 | ||
388 | 393 | ||
389 | #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt | 394 | #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt |
@@ -788,11 +793,10 @@ extern int ext2_fsync(struct file *file, loff_t start, loff_t end, | |||
788 | int datasync); | 793 | int datasync); |
789 | extern const struct inode_operations ext2_file_inode_operations; | 794 | extern const struct inode_operations ext2_file_inode_operations; |
790 | extern const struct file_operations ext2_file_operations; | 795 | extern const struct file_operations ext2_file_operations; |
791 | extern const struct file_operations ext2_xip_file_operations; | 796 | extern const struct file_operations ext2_dax_file_operations; |
792 | 797 | ||
793 | /* inode.c */ | 798 | /* inode.c */ |
794 | extern const struct address_space_operations ext2_aops; | 799 | extern const struct address_space_operations ext2_aops; |
795 | extern const struct address_space_operations ext2_aops_xip; | ||
796 | extern const struct address_space_operations ext2_nobh_aops; | 800 | extern const struct address_space_operations ext2_nobh_aops; |
797 | 801 | ||
798 | /* namei.c */ | 802 | /* namei.c */ |
diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 7c87b22a7228..e31701713516 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c | |||
@@ -25,6 +25,36 @@ | |||
25 | #include "xattr.h" | 25 | #include "xattr.h" |
26 | #include "acl.h" | 26 | #include "acl.h" |
27 | 27 | ||
28 | #ifdef CONFIG_FS_DAX | ||
29 | static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
30 | { | ||
31 | return dax_fault(vma, vmf, ext2_get_block); | ||
32 | } | ||
33 | |||
34 | static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
35 | { | ||
36 | return dax_mkwrite(vma, vmf, ext2_get_block); | ||
37 | } | ||
38 | |||
39 | static const struct vm_operations_struct ext2_dax_vm_ops = { | ||
40 | .fault = ext2_dax_fault, | ||
41 | .page_mkwrite = ext2_dax_mkwrite, | ||
42 | }; | ||
43 | |||
44 | static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma) | ||
45 | { | ||
46 | if (!IS_DAX(file_inode(file))) | ||
47 | return generic_file_mmap(file, vma); | ||
48 | |||
49 | file_accessed(file); | ||
50 | vma->vm_ops = &ext2_dax_vm_ops; | ||
51 | vma->vm_flags |= VM_MIXEDMAP; | ||
52 | return 0; | ||
53 | } | ||
54 | #else | ||
55 | #define ext2_file_mmap generic_file_mmap | ||
56 | #endif | ||
57 | |||
28 | /* | 58 | /* |
29 | * Called when filp is released. This happens when all file descriptors | 59 | * Called when filp is released. This happens when all file descriptors |
30 | * for a single struct file are closed. Note that different open() calls | 60 | * for a single struct file are closed. Note that different open() calls |
@@ -70,7 +100,7 @@ const struct file_operations ext2_file_operations = { | |||
70 | #ifdef CONFIG_COMPAT | 100 | #ifdef CONFIG_COMPAT |
71 | .compat_ioctl = ext2_compat_ioctl, | 101 | .compat_ioctl = ext2_compat_ioctl, |
72 | #endif | 102 | #endif |
73 | .mmap = generic_file_mmap, | 103 | .mmap = ext2_file_mmap, |
74 | .open = dquot_file_open, | 104 | .open = dquot_file_open, |
75 | .release = ext2_release_file, | 105 | .release = ext2_release_file, |
76 | .fsync = ext2_fsync, | 106 | .fsync = ext2_fsync, |
@@ -78,16 +108,18 @@ const struct file_operations ext2_file_operations = { | |||
78 | .splice_write = iter_file_splice_write, | 108 | .splice_write = iter_file_splice_write, |
79 | }; | 109 | }; |
80 | 110 | ||
81 | #ifdef CONFIG_EXT2_FS_XIP | 111 | #ifdef CONFIG_FS_DAX |
82 | const struct file_operations ext2_xip_file_operations = { | 112 | const struct file_operations ext2_dax_file_operations = { |
83 | .llseek = generic_file_llseek, | 113 | .llseek = generic_file_llseek, |
84 | .read = xip_file_read, | 114 | .read = new_sync_read, |
85 | .write = xip_file_write, | 115 | .write = new_sync_write, |
116 | .read_iter = generic_file_read_iter, | ||
117 | .write_iter = generic_file_write_iter, | ||
86 | .unlocked_ioctl = ext2_ioctl, | 118 | .unlocked_ioctl = ext2_ioctl, |
87 | #ifdef CONFIG_COMPAT | 119 | #ifdef CONFIG_COMPAT |
88 | .compat_ioctl = ext2_compat_ioctl, | 120 | .compat_ioctl = ext2_compat_ioctl, |
89 | #endif | 121 | #endif |
90 | .mmap = xip_file_mmap, | 122 | .mmap = ext2_file_mmap, |
91 | .open = dquot_file_open, | 123 | .open = dquot_file_open, |
92 | .release = ext2_release_file, | 124 | .release = ext2_release_file, |
93 | .fsync = ext2_fsync, | 125 | .fsync = ext2_fsync, |
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 7d66fb0e4cca..6c14bb8322fa 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c | |||
@@ -170,7 +170,7 @@ static void ext2_preread_inode(struct inode *inode) | |||
170 | struct ext2_group_desc * gdp; | 170 | struct ext2_group_desc * gdp; |
171 | struct backing_dev_info *bdi; | 171 | struct backing_dev_info *bdi; |
172 | 172 | ||
173 | bdi = inode->i_mapping->backing_dev_info; | 173 | bdi = inode_to_bdi(inode); |
174 | if (bdi_read_congested(bdi)) | 174 | if (bdi_read_congested(bdi)) |
175 | return; | 175 | return; |
176 | if (bdi_write_congested(bdi)) | 176 | if (bdi_write_congested(bdi)) |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 36d35c36311d..6434bc000125 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -34,7 +34,6 @@ | |||
34 | #include <linux/aio.h> | 34 | #include <linux/aio.h> |
35 | #include "ext2.h" | 35 | #include "ext2.h" |
36 | #include "acl.h" | 36 | #include "acl.h" |
37 | #include "xip.h" | ||
38 | #include "xattr.h" | 37 | #include "xattr.h" |
39 | 38 | ||
40 | static int __ext2_write_inode(struct inode *inode, int do_sync); | 39 | static int __ext2_write_inode(struct inode *inode, int do_sync); |
@@ -731,12 +730,14 @@ static int ext2_get_blocks(struct inode *inode, | |||
731 | goto cleanup; | 730 | goto cleanup; |
732 | } | 731 | } |
733 | 732 | ||
734 | if (ext2_use_xip(inode->i_sb)) { | 733 | if (IS_DAX(inode)) { |
735 | /* | 734 | /* |
736 | * we need to clear the block | 735 | * block must be initialised before we put it in the tree |
736 | * so that it's not found by another thread before it's | ||
737 | * initialised | ||
737 | */ | 738 | */ |
738 | err = ext2_clear_xip_target (inode, | 739 | err = dax_clear_blocks(inode, le32_to_cpu(chain[depth-1].key), |
739 | le32_to_cpu(chain[depth-1].key)); | 740 | 1 << inode->i_blkbits); |
740 | if (err) { | 741 | if (err) { |
741 | mutex_unlock(&ei->truncate_mutex); | 742 | mutex_unlock(&ei->truncate_mutex); |
742 | goto cleanup; | 743 | goto cleanup; |
@@ -859,7 +860,12 @@ ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, | |||
859 | size_t count = iov_iter_count(iter); | 860 | size_t count = iov_iter_count(iter); |
860 | ssize_t ret; | 861 | ssize_t ret; |
861 | 862 | ||
862 | ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext2_get_block); | 863 | if (IS_DAX(inode)) |
864 | ret = dax_do_io(rw, iocb, inode, iter, offset, ext2_get_block, | ||
865 | NULL, DIO_LOCKING); | ||
866 | else | ||
867 | ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, | ||
868 | ext2_get_block); | ||
863 | if (ret < 0 && (rw & WRITE)) | 869 | if (ret < 0 && (rw & WRITE)) |
864 | ext2_write_failed(mapping, offset + count); | 870 | ext2_write_failed(mapping, offset + count); |
865 | return ret; | 871 | return ret; |
@@ -885,11 +891,6 @@ const struct address_space_operations ext2_aops = { | |||
885 | .error_remove_page = generic_error_remove_page, | 891 | .error_remove_page = generic_error_remove_page, |
886 | }; | 892 | }; |
887 | 893 | ||
888 | const struct address_space_operations ext2_aops_xip = { | ||
889 | .bmap = ext2_bmap, | ||
890 | .get_xip_mem = ext2_get_xip_mem, | ||
891 | }; | ||
892 | |||
893 | const struct address_space_operations ext2_nobh_aops = { | 894 | const struct address_space_operations ext2_nobh_aops = { |
894 | .readpage = ext2_readpage, | 895 | .readpage = ext2_readpage, |
895 | .readpages = ext2_readpages, | 896 | .readpages = ext2_readpages, |
@@ -1201,8 +1202,8 @@ static int ext2_setsize(struct inode *inode, loff_t newsize) | |||
1201 | 1202 | ||
1202 | inode_dio_wait(inode); | 1203 | inode_dio_wait(inode); |
1203 | 1204 | ||
1204 | if (mapping_is_xip(inode->i_mapping)) | 1205 | if (IS_DAX(inode)) |
1205 | error = xip_truncate_page(inode->i_mapping, newsize); | 1206 | error = dax_truncate_page(inode, newsize, ext2_get_block); |
1206 | else if (test_opt(inode->i_sb, NOBH)) | 1207 | else if (test_opt(inode->i_sb, NOBH)) |
1207 | error = nobh_truncate_page(inode->i_mapping, | 1208 | error = nobh_truncate_page(inode->i_mapping, |
1208 | newsize, ext2_get_block); | 1209 | newsize, ext2_get_block); |
@@ -1273,7 +1274,8 @@ void ext2_set_inode_flags(struct inode *inode) | |||
1273 | { | 1274 | { |
1274 | unsigned int flags = EXT2_I(inode)->i_flags; | 1275 | unsigned int flags = EXT2_I(inode)->i_flags; |
1275 | 1276 | ||
1276 | inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); | 1277 | inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | |
1278 | S_DIRSYNC | S_DAX); | ||
1277 | if (flags & EXT2_SYNC_FL) | 1279 | if (flags & EXT2_SYNC_FL) |
1278 | inode->i_flags |= S_SYNC; | 1280 | inode->i_flags |= S_SYNC; |
1279 | if (flags & EXT2_APPEND_FL) | 1281 | if (flags & EXT2_APPEND_FL) |
@@ -1284,6 +1286,8 @@ void ext2_set_inode_flags(struct inode *inode) | |||
1284 | inode->i_flags |= S_NOATIME; | 1286 | inode->i_flags |= S_NOATIME; |
1285 | if (flags & EXT2_DIRSYNC_FL) | 1287 | if (flags & EXT2_DIRSYNC_FL) |
1286 | inode->i_flags |= S_DIRSYNC; | 1288 | inode->i_flags |= S_DIRSYNC; |
1289 | if (test_opt(inode->i_sb, DAX)) | ||
1290 | inode->i_flags |= S_DAX; | ||
1287 | } | 1291 | } |
1288 | 1292 | ||
1289 | /* Propagate flags from i_flags to EXT2_I(inode)->i_flags */ | 1293 | /* Propagate flags from i_flags to EXT2_I(inode)->i_flags */ |
@@ -1384,9 +1388,9 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) | |||
1384 | 1388 | ||
1385 | if (S_ISREG(inode->i_mode)) { | 1389 | if (S_ISREG(inode->i_mode)) { |
1386 | inode->i_op = &ext2_file_inode_operations; | 1390 | inode->i_op = &ext2_file_inode_operations; |
1387 | if (ext2_use_xip(inode->i_sb)) { | 1391 | if (test_opt(inode->i_sb, DAX)) { |
1388 | inode->i_mapping->a_ops = &ext2_aops_xip; | 1392 | inode->i_mapping->a_ops = &ext2_aops; |
1389 | inode->i_fop = &ext2_xip_file_operations; | 1393 | inode->i_fop = &ext2_dax_file_operations; |
1390 | } else if (test_opt(inode->i_sb, NOBH)) { | 1394 | } else if (test_opt(inode->i_sb, NOBH)) { |
1391 | inode->i_mapping->a_ops = &ext2_nobh_aops; | 1395 | inode->i_mapping->a_ops = &ext2_nobh_aops; |
1392 | inode->i_fop = &ext2_file_operations; | 1396 | inode->i_fop = &ext2_file_operations; |
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index c268d0af1db9..148f6e3789ea 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c | |||
@@ -35,7 +35,6 @@ | |||
35 | #include "ext2.h" | 35 | #include "ext2.h" |
36 | #include "xattr.h" | 36 | #include "xattr.h" |
37 | #include "acl.h" | 37 | #include "acl.h" |
38 | #include "xip.h" | ||
39 | 38 | ||
40 | static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) | 39 | static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) |
41 | { | 40 | { |
@@ -105,9 +104,9 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode | |||
105 | return PTR_ERR(inode); | 104 | return PTR_ERR(inode); |
106 | 105 | ||
107 | inode->i_op = &ext2_file_inode_operations; | 106 | inode->i_op = &ext2_file_inode_operations; |
108 | if (ext2_use_xip(inode->i_sb)) { | 107 | if (test_opt(inode->i_sb, DAX)) { |
109 | inode->i_mapping->a_ops = &ext2_aops_xip; | 108 | inode->i_mapping->a_ops = &ext2_aops; |
110 | inode->i_fop = &ext2_xip_file_operations; | 109 | inode->i_fop = &ext2_dax_file_operations; |
111 | } else if (test_opt(inode->i_sb, NOBH)) { | 110 | } else if (test_opt(inode->i_sb, NOBH)) { |
112 | inode->i_mapping->a_ops = &ext2_nobh_aops; | 111 | inode->i_mapping->a_ops = &ext2_nobh_aops; |
113 | inode->i_fop = &ext2_file_operations; | 112 | inode->i_fop = &ext2_file_operations; |
@@ -126,9 +125,9 @@ static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
126 | return PTR_ERR(inode); | 125 | return PTR_ERR(inode); |
127 | 126 | ||
128 | inode->i_op = &ext2_file_inode_operations; | 127 | inode->i_op = &ext2_file_inode_operations; |
129 | if (ext2_use_xip(inode->i_sb)) { | 128 | if (test_opt(inode->i_sb, DAX)) { |
130 | inode->i_mapping->a_ops = &ext2_aops_xip; | 129 | inode->i_mapping->a_ops = &ext2_aops; |
131 | inode->i_fop = &ext2_xip_file_operations; | 130 | inode->i_fop = &ext2_dax_file_operations; |
132 | } else if (test_opt(inode->i_sb, NOBH)) { | 131 | } else if (test_opt(inode->i_sb, NOBH)) { |
133 | inode->i_mapping->a_ops = &ext2_nobh_aops; | 132 | inode->i_mapping->a_ops = &ext2_nobh_aops; |
134 | inode->i_fop = &ext2_file_operations; | 133 | inode->i_fop = &ext2_file_operations; |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index ae55fddc26a9..d0e746e96511 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -35,7 +35,6 @@ | |||
35 | #include "ext2.h" | 35 | #include "ext2.h" |
36 | #include "xattr.h" | 36 | #include "xattr.h" |
37 | #include "acl.h" | 37 | #include "acl.h" |
38 | #include "xip.h" | ||
39 | 38 | ||
40 | static void ext2_sync_super(struct super_block *sb, | 39 | static void ext2_sync_super(struct super_block *sb, |
41 | struct ext2_super_block *es, int wait); | 40 | struct ext2_super_block *es, int wait); |
@@ -292,9 +291,11 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root) | |||
292 | seq_puts(seq, ",grpquota"); | 291 | seq_puts(seq, ",grpquota"); |
293 | #endif | 292 | #endif |
294 | 293 | ||
295 | #if defined(CONFIG_EXT2_FS_XIP) | 294 | #ifdef CONFIG_FS_DAX |
296 | if (sbi->s_mount_opt & EXT2_MOUNT_XIP) | 295 | if (sbi->s_mount_opt & EXT2_MOUNT_XIP) |
297 | seq_puts(seq, ",xip"); | 296 | seq_puts(seq, ",xip"); |
297 | if (sbi->s_mount_opt & EXT2_MOUNT_DAX) | ||
298 | seq_puts(seq, ",dax"); | ||
298 | #endif | 299 | #endif |
299 | 300 | ||
300 | if (!test_opt(sb, RESERVATION)) | 301 | if (!test_opt(sb, RESERVATION)) |
@@ -403,7 +404,7 @@ enum { | |||
403 | Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, | 404 | Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, |
404 | Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug, | 405 | Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug, |
405 | Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr, | 406 | Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr, |
406 | Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota, | 407 | Opt_acl, Opt_noacl, Opt_xip, Opt_dax, Opt_ignore, Opt_err, Opt_quota, |
407 | Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation | 408 | Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation |
408 | }; | 409 | }; |
409 | 410 | ||
@@ -432,6 +433,7 @@ static const match_table_t tokens = { | |||
432 | {Opt_acl, "acl"}, | 433 | {Opt_acl, "acl"}, |
433 | {Opt_noacl, "noacl"}, | 434 | {Opt_noacl, "noacl"}, |
434 | {Opt_xip, "xip"}, | 435 | {Opt_xip, "xip"}, |
436 | {Opt_dax, "dax"}, | ||
435 | {Opt_grpquota, "grpquota"}, | 437 | {Opt_grpquota, "grpquota"}, |
436 | {Opt_ignore, "noquota"}, | 438 | {Opt_ignore, "noquota"}, |
437 | {Opt_quota, "quota"}, | 439 | {Opt_quota, "quota"}, |
@@ -559,10 +561,14 @@ static int parse_options(char *options, struct super_block *sb) | |||
559 | break; | 561 | break; |
560 | #endif | 562 | #endif |
561 | case Opt_xip: | 563 | case Opt_xip: |
562 | #ifdef CONFIG_EXT2_FS_XIP | 564 | ext2_msg(sb, KERN_INFO, "use dax instead of xip"); |
563 | set_opt (sbi->s_mount_opt, XIP); | 565 | set_opt(sbi->s_mount_opt, XIP); |
566 | /* Fall through */ | ||
567 | case Opt_dax: | ||
568 | #ifdef CONFIG_FS_DAX | ||
569 | set_opt(sbi->s_mount_opt, DAX); | ||
564 | #else | 570 | #else |
565 | ext2_msg(sb, KERN_INFO, "xip option not supported"); | 571 | ext2_msg(sb, KERN_INFO, "dax option not supported"); |
566 | #endif | 572 | #endif |
567 | break; | 573 | break; |
568 | 574 | ||
@@ -877,9 +883,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) | |||
877 | ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? | 883 | ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? |
878 | MS_POSIXACL : 0); | 884 | MS_POSIXACL : 0); |
879 | 885 | ||
880 | ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset | ||
881 | EXT2_MOUNT_XIP if not */ | ||
882 | |||
883 | if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV && | 886 | if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV && |
884 | (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) || | 887 | (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) || |
885 | EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) || | 888 | EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) || |
@@ -909,11 +912,17 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) | |||
909 | 912 | ||
910 | blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); | 913 | blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); |
911 | 914 | ||
912 | if (ext2_use_xip(sb) && blocksize != PAGE_SIZE) { | 915 | if (sbi->s_mount_opt & EXT2_MOUNT_DAX) { |
913 | if (!silent) | 916 | if (blocksize != PAGE_SIZE) { |
914 | ext2_msg(sb, KERN_ERR, | 917 | ext2_msg(sb, KERN_ERR, |
915 | "error: unsupported blocksize for xip"); | 918 | "error: unsupported blocksize for dax"); |
916 | goto failed_mount; | 919 | goto failed_mount; |
920 | } | ||
921 | if (!sb->s_bdev->bd_disk->fops->direct_access) { | ||
922 | ext2_msg(sb, KERN_ERR, | ||
923 | "error: device does not support dax"); | ||
924 | goto failed_mount; | ||
925 | } | ||
917 | } | 926 | } |
918 | 927 | ||
919 | /* If the blocksize doesn't match, re-read the thing.. */ | 928 | /* If the blocksize doesn't match, re-read the thing.. */ |
@@ -1259,7 +1268,6 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) | |||
1259 | { | 1268 | { |
1260 | struct ext2_sb_info * sbi = EXT2_SB(sb); | 1269 | struct ext2_sb_info * sbi = EXT2_SB(sb); |
1261 | struct ext2_super_block * es; | 1270 | struct ext2_super_block * es; |
1262 | unsigned long old_mount_opt = sbi->s_mount_opt; | ||
1263 | struct ext2_mount_options old_opts; | 1271 | struct ext2_mount_options old_opts; |
1264 | unsigned long old_sb_flags; | 1272 | unsigned long old_sb_flags; |
1265 | int err; | 1273 | int err; |
@@ -1284,22 +1292,11 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) | |||
1284 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | 1292 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | |
1285 | ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); | 1293 | ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); |
1286 | 1294 | ||
1287 | ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset | ||
1288 | EXT2_MOUNT_XIP if not */ | ||
1289 | |||
1290 | if ((ext2_use_xip(sb)) && (sb->s_blocksize != PAGE_SIZE)) { | ||
1291 | ext2_msg(sb, KERN_WARNING, | ||
1292 | "warning: unsupported blocksize for xip"); | ||
1293 | err = -EINVAL; | ||
1294 | goto restore_opts; | ||
1295 | } | ||
1296 | |||
1297 | es = sbi->s_es; | 1295 | es = sbi->s_es; |
1298 | if ((sbi->s_mount_opt ^ old_mount_opt) & EXT2_MOUNT_XIP) { | 1296 | if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT2_MOUNT_DAX) { |
1299 | ext2_msg(sb, KERN_WARNING, "warning: refusing change of " | 1297 | ext2_msg(sb, KERN_WARNING, "warning: refusing change of " |
1300 | "xip flag with busy inodes while remounting"); | 1298 | "dax flag with busy inodes while remounting"); |
1301 | sbi->s_mount_opt &= ~EXT2_MOUNT_XIP; | 1299 | sbi->s_mount_opt ^= EXT2_MOUNT_DAX; |
1302 | sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP; | ||
1303 | } | 1300 | } |
1304 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { | 1301 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { |
1305 | spin_unlock(&sbi->s_lock); | 1302 | spin_unlock(&sbi->s_lock); |
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c deleted file mode 100644 index e98171a11cfe..000000000000 --- a/fs/ext2/xip.c +++ /dev/null | |||
@@ -1,91 +0,0 @@ | |||
1 | /* | ||
2 | * linux/fs/ext2/xip.c | ||
3 | * | ||
4 | * Copyright (C) 2005 IBM Corporation | ||
5 | * Author: Carsten Otte (cotte@de.ibm.com) | ||
6 | */ | ||
7 | |||
8 | #include <linux/mm.h> | ||
9 | #include <linux/fs.h> | ||
10 | #include <linux/genhd.h> | ||
11 | #include <linux/buffer_head.h> | ||
12 | #include <linux/blkdev.h> | ||
13 | #include "ext2.h" | ||
14 | #include "xip.h" | ||
15 | |||
16 | static inline int | ||
17 | __inode_direct_access(struct inode *inode, sector_t block, | ||
18 | void **kaddr, unsigned long *pfn) | ||
19 | { | ||
20 | struct block_device *bdev = inode->i_sb->s_bdev; | ||
21 | const struct block_device_operations *ops = bdev->bd_disk->fops; | ||
22 | sector_t sector; | ||
23 | |||
24 | sector = block * (PAGE_SIZE / 512); /* ext2 block to bdev sector */ | ||
25 | |||
26 | BUG_ON(!ops->direct_access); | ||
27 | return ops->direct_access(bdev, sector, kaddr, pfn); | ||
28 | } | ||
29 | |||
30 | static inline int | ||
31 | __ext2_get_block(struct inode *inode, pgoff_t pgoff, int create, | ||
32 | sector_t *result) | ||
33 | { | ||
34 | struct buffer_head tmp; | ||
35 | int rc; | ||
36 | |||
37 | memset(&tmp, 0, sizeof(struct buffer_head)); | ||
38 | tmp.b_size = 1 << inode->i_blkbits; | ||
39 | rc = ext2_get_block(inode, pgoff, &tmp, create); | ||
40 | *result = tmp.b_blocknr; | ||
41 | |||
42 | /* did we get a sparse block (hole in the file)? */ | ||
43 | if (!tmp.b_blocknr && !rc) { | ||
44 | BUG_ON(create); | ||
45 | rc = -ENODATA; | ||
46 | } | ||
47 | |||
48 | return rc; | ||
49 | } | ||
50 | |||
51 | int | ||
52 | ext2_clear_xip_target(struct inode *inode, sector_t block) | ||
53 | { | ||
54 | void *kaddr; | ||
55 | unsigned long pfn; | ||
56 | int rc; | ||
57 | |||
58 | rc = __inode_direct_access(inode, block, &kaddr, &pfn); | ||
59 | if (!rc) | ||
60 | clear_page(kaddr); | ||
61 | return rc; | ||
62 | } | ||
63 | |||
64 | void ext2_xip_verify_sb(struct super_block *sb) | ||
65 | { | ||
66 | struct ext2_sb_info *sbi = EXT2_SB(sb); | ||
67 | |||
68 | if ((sbi->s_mount_opt & EXT2_MOUNT_XIP) && | ||
69 | !sb->s_bdev->bd_disk->fops->direct_access) { | ||
70 | sbi->s_mount_opt &= (~EXT2_MOUNT_XIP); | ||
71 | ext2_msg(sb, KERN_WARNING, | ||
72 | "warning: ignoring xip option - " | ||
73 | "not supported by bdev"); | ||
74 | } | ||
75 | } | ||
76 | |||
77 | int ext2_get_xip_mem(struct address_space *mapping, pgoff_t pgoff, int create, | ||
78 | void **kmem, unsigned long *pfn) | ||
79 | { | ||
80 | int rc; | ||
81 | sector_t block; | ||
82 | |||
83 | /* first, retrieve the sector number */ | ||
84 | rc = __ext2_get_block(mapping->host, pgoff, create, &block); | ||
85 | if (rc) | ||
86 | return rc; | ||
87 | |||
88 | /* retrieve address of the target data */ | ||
89 | rc = __inode_direct_access(mapping->host, block, kmem, pfn); | ||
90 | return rc; | ||
91 | } | ||
diff --git a/fs/ext2/xip.h b/fs/ext2/xip.h deleted file mode 100644 index 18b34d2f31b3..000000000000 --- a/fs/ext2/xip.h +++ /dev/null | |||
@@ -1,26 +0,0 @@ | |||
1 | /* | ||
2 | * linux/fs/ext2/xip.h | ||
3 | * | ||
4 | * Copyright (C) 2005 IBM Corporation | ||
5 | * Author: Carsten Otte (cotte@de.ibm.com) | ||
6 | */ | ||
7 | |||
8 | #ifdef CONFIG_EXT2_FS_XIP | ||
9 | extern void ext2_xip_verify_sb (struct super_block *); | ||
10 | extern int ext2_clear_xip_target (struct inode *, sector_t); | ||
11 | |||
12 | static inline int ext2_use_xip (struct super_block *sb) | ||
13 | { | ||
14 | struct ext2_sb_info *sbi = EXT2_SB(sb); | ||
15 | return (sbi->s_mount_opt & EXT2_MOUNT_XIP); | ||
16 | } | ||
17 | int ext2_get_xip_mem(struct address_space *, pgoff_t, int, | ||
18 | void **, unsigned long *); | ||
19 | #define mapping_is_xip(map) unlikely(map->a_ops->get_xip_mem) | ||
20 | #else | ||
21 | #define mapping_is_xip(map) 0 | ||
22 | #define ext2_xip_verify_sb(sb) do { } while (0) | ||
23 | #define ext2_use_xip(sb) 0 | ||
24 | #define ext2_clear_xip_target(inode, chain) 0 | ||
25 | #define ext2_get_xip_mem NULL | ||
26 | #endif | ||
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 9b4e7d750d4f..d4dbf3c259b3 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -466,6 +466,8 @@ static void ext3_put_super (struct super_block * sb) | |||
466 | } | 466 | } |
467 | sb->s_fs_info = NULL; | 467 | sb->s_fs_info = NULL; |
468 | kfree(sbi->s_blockgroup_lock); | 468 | kfree(sbi->s_blockgroup_lock); |
469 | mutex_destroy(&sbi->s_orphan_lock); | ||
470 | mutex_destroy(&sbi->s_resize_lock); | ||
469 | kfree(sbi); | 471 | kfree(sbi); |
470 | } | 472 | } |
471 | 473 | ||
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a75fba67bb1f..982d934fd9ac 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -965,6 +965,11 @@ struct ext4_inode_info { | |||
965 | #define EXT4_MOUNT_ERRORS_MASK 0x00070 | 965 | #define EXT4_MOUNT_ERRORS_MASK 0x00070 |
966 | #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ | 966 | #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ |
967 | #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ | 967 | #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ |
968 | #ifdef CONFIG_FS_DAX | ||
969 | #define EXT4_MOUNT_DAX 0x00200 /* Direct Access */ | ||
970 | #else | ||
971 | #define EXT4_MOUNT_DAX 0 | ||
972 | #endif | ||
968 | #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ | 973 | #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ |
969 | #define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ | 974 | #define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ |
970 | #define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ | 975 | #define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ |
@@ -2578,6 +2583,7 @@ extern const struct file_operations ext4_dir_operations; | |||
2578 | /* file.c */ | 2583 | /* file.c */ |
2579 | extern const struct inode_operations ext4_file_inode_operations; | 2584 | extern const struct inode_operations ext4_file_inode_operations; |
2580 | extern const struct file_operations ext4_file_operations; | 2585 | extern const struct file_operations ext4_file_operations; |
2586 | extern const struct file_operations ext4_dax_file_operations; | ||
2581 | extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); | 2587 | extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); |
2582 | 2588 | ||
2583 | /* inline.c */ | 2589 | /* inline.c */ |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 8131be8c0af3..33a09da16c9c 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -95,7 +95,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
95 | struct inode *inode = file_inode(iocb->ki_filp); | 95 | struct inode *inode = file_inode(iocb->ki_filp); |
96 | struct mutex *aio_mutex = NULL; | 96 | struct mutex *aio_mutex = NULL; |
97 | struct blk_plug plug; | 97 | struct blk_plug plug; |
98 | int o_direct = file->f_flags & O_DIRECT; | 98 | int o_direct = io_is_direct(file); |
99 | int overwrite = 0; | 99 | int overwrite = 0; |
100 | size_t length = iov_iter_count(from); | 100 | size_t length = iov_iter_count(from); |
101 | ssize_t ret; | 101 | ssize_t ret; |
@@ -191,17 +191,41 @@ errout: | |||
191 | return ret; | 191 | return ret; |
192 | } | 192 | } |
193 | 193 | ||
194 | #ifdef CONFIG_FS_DAX | ||
195 | static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
196 | { | ||
197 | return dax_fault(vma, vmf, ext4_get_block); | ||
198 | /* Is this the right get_block? */ | ||
199 | } | ||
200 | |||
201 | static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
202 | { | ||
203 | return dax_mkwrite(vma, vmf, ext4_get_block); | ||
204 | } | ||
205 | |||
206 | static const struct vm_operations_struct ext4_dax_vm_ops = { | ||
207 | .fault = ext4_dax_fault, | ||
208 | .page_mkwrite = ext4_dax_mkwrite, | ||
209 | }; | ||
210 | #else | ||
211 | #define ext4_dax_vm_ops ext4_file_vm_ops | ||
212 | #endif | ||
213 | |||
194 | static const struct vm_operations_struct ext4_file_vm_ops = { | 214 | static const struct vm_operations_struct ext4_file_vm_ops = { |
195 | .fault = filemap_fault, | 215 | .fault = filemap_fault, |
196 | .map_pages = filemap_map_pages, | 216 | .map_pages = filemap_map_pages, |
197 | .page_mkwrite = ext4_page_mkwrite, | 217 | .page_mkwrite = ext4_page_mkwrite, |
198 | .remap_pages = generic_file_remap_pages, | ||
199 | }; | 218 | }; |
200 | 219 | ||
201 | static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) | 220 | static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) |
202 | { | 221 | { |
203 | file_accessed(file); | 222 | file_accessed(file); |
204 | vma->vm_ops = &ext4_file_vm_ops; | 223 | if (IS_DAX(file_inode(file))) { |
224 | vma->vm_ops = &ext4_dax_vm_ops; | ||
225 | vma->vm_flags |= VM_MIXEDMAP; | ||
226 | } else { | ||
227 | vma->vm_ops = &ext4_file_vm_ops; | ||
228 | } | ||
205 | return 0; | 229 | return 0; |
206 | } | 230 | } |
207 | 231 | ||
@@ -600,6 +624,26 @@ const struct file_operations ext4_file_operations = { | |||
600 | .fallocate = ext4_fallocate, | 624 | .fallocate = ext4_fallocate, |
601 | }; | 625 | }; |
602 | 626 | ||
627 | #ifdef CONFIG_FS_DAX | ||
628 | const struct file_operations ext4_dax_file_operations = { | ||
629 | .llseek = ext4_llseek, | ||
630 | .read = new_sync_read, | ||
631 | .write = new_sync_write, | ||
632 | .read_iter = generic_file_read_iter, | ||
633 | .write_iter = ext4_file_write_iter, | ||
634 | .unlocked_ioctl = ext4_ioctl, | ||
635 | #ifdef CONFIG_COMPAT | ||
636 | .compat_ioctl = ext4_compat_ioctl, | ||
637 | #endif | ||
638 | .mmap = ext4_file_mmap, | ||
639 | .open = ext4_file_open, | ||
640 | .release = ext4_release_file, | ||
641 | .fsync = ext4_sync_file, | ||
642 | /* Splice not yet supported with DAX */ | ||
643 | .fallocate = ext4_fallocate, | ||
644 | }; | ||
645 | #endif | ||
646 | |||
603 | const struct inode_operations ext4_file_inode_operations = { | 647 | const struct inode_operations ext4_file_inode_operations = { |
604 | .setattr = ext4_setattr, | 648 | .setattr = ext4_setattr, |
605 | .getattr = ext4_getattr, | 649 | .getattr = ext4_getattr, |
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 36b369697a13..6b9878a24182 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
@@ -689,14 +689,22 @@ retry: | |||
689 | inode_dio_done(inode); | 689 | inode_dio_done(inode); |
690 | goto locked; | 690 | goto locked; |
691 | } | 691 | } |
692 | ret = __blockdev_direct_IO(rw, iocb, inode, | 692 | if (IS_DAX(inode)) |
693 | inode->i_sb->s_bdev, iter, offset, | 693 | ret = dax_do_io(rw, iocb, inode, iter, offset, |
694 | ext4_get_block, NULL, NULL, 0); | 694 | ext4_get_block, NULL, 0); |
695 | else | ||
696 | ret = __blockdev_direct_IO(rw, iocb, inode, | ||
697 | inode->i_sb->s_bdev, iter, offset, | ||
698 | ext4_get_block, NULL, NULL, 0); | ||
695 | inode_dio_done(inode); | 699 | inode_dio_done(inode); |
696 | } else { | 700 | } else { |
697 | locked: | 701 | locked: |
698 | ret = blockdev_direct_IO(rw, iocb, inode, iter, | 702 | if (IS_DAX(inode)) |
699 | offset, ext4_get_block); | 703 | ret = dax_do_io(rw, iocb, inode, iter, offset, |
704 | ext4_get_block, NULL, DIO_LOCKING); | ||
705 | else | ||
706 | ret = blockdev_direct_IO(rw, iocb, inode, iter, | ||
707 | offset, ext4_get_block); | ||
700 | 708 | ||
701 | if (unlikely((rw & WRITE) && ret < 0)) { | 709 | if (unlikely((rw & WRITE) && ret < 0)) { |
702 | loff_t isize = i_size_read(inode); | 710 | loff_t isize = i_size_read(inode); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5653fa42930b..85404f15e53a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -657,6 +657,18 @@ has_zeroout: | |||
657 | return retval; | 657 | return retval; |
658 | } | 658 | } |
659 | 659 | ||
660 | static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate) | ||
661 | { | ||
662 | struct inode *inode = bh->b_assoc_map->host; | ||
663 | /* XXX: breaks on 32-bit > 16GB. Is that even supported? */ | ||
664 | loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits; | ||
665 | int err; | ||
666 | if (!uptodate) | ||
667 | return; | ||
668 | WARN_ON(!buffer_unwritten(bh)); | ||
669 | err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size); | ||
670 | } | ||
671 | |||
660 | /* Maximum number of blocks we map for direct IO at once. */ | 672 | /* Maximum number of blocks we map for direct IO at once. */ |
661 | #define DIO_MAX_BLOCKS 4096 | 673 | #define DIO_MAX_BLOCKS 4096 |
662 | 674 | ||
@@ -694,6 +706,11 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, | |||
694 | 706 | ||
695 | map_bh(bh, inode->i_sb, map.m_pblk); | 707 | map_bh(bh, inode->i_sb, map.m_pblk); |
696 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | 708 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; |
709 | if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) { | ||
710 | bh->b_assoc_map = inode->i_mapping; | ||
711 | bh->b_private = (void *)(unsigned long)iblock; | ||
712 | bh->b_end_io = ext4_end_io_unwritten; | ||
713 | } | ||
697 | if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) | 714 | if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) |
698 | set_buffer_defer_completion(bh); | 715 | set_buffer_defer_completion(bh); |
699 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; | 716 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; |
@@ -3010,13 +3027,14 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3010 | get_block_func = ext4_get_block_write; | 3027 | get_block_func = ext4_get_block_write; |
3011 | dio_flags = DIO_LOCKING; | 3028 | dio_flags = DIO_LOCKING; |
3012 | } | 3029 | } |
3013 | ret = __blockdev_direct_IO(rw, iocb, inode, | 3030 | if (IS_DAX(inode)) |
3014 | inode->i_sb->s_bdev, iter, | 3031 | ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func, |
3015 | offset, | 3032 | ext4_end_io_dio, dio_flags); |
3016 | get_block_func, | 3033 | else |
3017 | ext4_end_io_dio, | 3034 | ret = __blockdev_direct_IO(rw, iocb, inode, |
3018 | NULL, | 3035 | inode->i_sb->s_bdev, iter, offset, |
3019 | dio_flags); | 3036 | get_block_func, |
3037 | ext4_end_io_dio, NULL, dio_flags); | ||
3020 | 3038 | ||
3021 | /* | 3039 | /* |
3022 | * Put our reference to io_end. This can free the io_end structure e.g. | 3040 | * Put our reference to io_end. This can free the io_end structure e.g. |
@@ -3180,19 +3198,12 @@ void ext4_set_aops(struct inode *inode) | |||
3180 | inode->i_mapping->a_ops = &ext4_aops; | 3198 | inode->i_mapping->a_ops = &ext4_aops; |
3181 | } | 3199 | } |
3182 | 3200 | ||
3183 | /* | 3201 | static int __ext4_block_zero_page_range(handle_t *handle, |
3184 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' | ||
3185 | * starting from file offset 'from'. The range to be zero'd must | ||
3186 | * be contained with in one block. If the specified range exceeds | ||
3187 | * the end of the block it will be shortened to end of the block | ||
3188 | * that cooresponds to 'from' | ||
3189 | */ | ||
3190 | static int ext4_block_zero_page_range(handle_t *handle, | ||
3191 | struct address_space *mapping, loff_t from, loff_t length) | 3202 | struct address_space *mapping, loff_t from, loff_t length) |
3192 | { | 3203 | { |
3193 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 3204 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
3194 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 3205 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
3195 | unsigned blocksize, max, pos; | 3206 | unsigned blocksize, pos; |
3196 | ext4_lblk_t iblock; | 3207 | ext4_lblk_t iblock; |
3197 | struct inode *inode = mapping->host; | 3208 | struct inode *inode = mapping->host; |
3198 | struct buffer_head *bh; | 3209 | struct buffer_head *bh; |
@@ -3205,14 +3216,6 @@ static int ext4_block_zero_page_range(handle_t *handle, | |||
3205 | return -ENOMEM; | 3216 | return -ENOMEM; |
3206 | 3217 | ||
3207 | blocksize = inode->i_sb->s_blocksize; | 3218 | blocksize = inode->i_sb->s_blocksize; |
3208 | max = blocksize - (offset & (blocksize - 1)); | ||
3209 | |||
3210 | /* | ||
3211 | * correct length if it does not fall between | ||
3212 | * 'from' and the end of the block | ||
3213 | */ | ||
3214 | if (length > max || length < 0) | ||
3215 | length = max; | ||
3216 | 3219 | ||
3217 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 3220 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
3218 | 3221 | ||
@@ -3278,6 +3281,33 @@ unlock: | |||
3278 | } | 3281 | } |
3279 | 3282 | ||
3280 | /* | 3283 | /* |
3284 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' | ||
3285 | * starting from file offset 'from'. The range to be zero'd must | ||
3286 | * be contained with in one block. If the specified range exceeds | ||
3287 | * the end of the block it will be shortened to end of the block | ||
3288 | * that cooresponds to 'from' | ||
3289 | */ | ||
3290 | static int ext4_block_zero_page_range(handle_t *handle, | ||
3291 | struct address_space *mapping, loff_t from, loff_t length) | ||
3292 | { | ||
3293 | struct inode *inode = mapping->host; | ||
3294 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
3295 | unsigned blocksize = inode->i_sb->s_blocksize; | ||
3296 | unsigned max = blocksize - (offset & (blocksize - 1)); | ||
3297 | |||
3298 | /* | ||
3299 | * correct length if it does not fall between | ||
3300 | * 'from' and the end of the block | ||
3301 | */ | ||
3302 | if (length > max || length < 0) | ||
3303 | length = max; | ||
3304 | |||
3305 | if (IS_DAX(inode)) | ||
3306 | return dax_zero_page_range(inode, from, length, ext4_get_block); | ||
3307 | return __ext4_block_zero_page_range(handle, mapping, from, length); | ||
3308 | } | ||
3309 | |||
3310 | /* | ||
3281 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' | 3311 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' |
3282 | * up to the end of the block which corresponds to `from'. | 3312 | * up to the end of the block which corresponds to `from'. |
3283 | * This required during truncate. We need to physically zero the tail end | 3313 | * This required during truncate. We need to physically zero the tail end |
@@ -3798,8 +3828,10 @@ void ext4_set_inode_flags(struct inode *inode) | |||
3798 | new_fl |= S_NOATIME; | 3828 | new_fl |= S_NOATIME; |
3799 | if (flags & EXT4_DIRSYNC_FL) | 3829 | if (flags & EXT4_DIRSYNC_FL) |
3800 | new_fl |= S_DIRSYNC; | 3830 | new_fl |= S_DIRSYNC; |
3831 | if (test_opt(inode->i_sb, DAX)) | ||
3832 | new_fl |= S_DAX; | ||
3801 | inode_set_flags(inode, new_fl, | 3833 | inode_set_flags(inode, new_fl, |
3802 | S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); | 3834 | S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX); |
3803 | } | 3835 | } |
3804 | 3836 | ||
3805 | /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ | 3837 | /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ |
@@ -4052,7 +4084,10 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4052 | 4084 | ||
4053 | if (S_ISREG(inode->i_mode)) { | 4085 | if (S_ISREG(inode->i_mode)) { |
4054 | inode->i_op = &ext4_file_inode_operations; | 4086 | inode->i_op = &ext4_file_inode_operations; |
4055 | inode->i_fop = &ext4_file_operations; | 4087 | if (test_opt(inode->i_sb, DAX)) |
4088 | inode->i_fop = &ext4_dax_file_operations; | ||
4089 | else | ||
4090 | inode->i_fop = &ext4_file_operations; | ||
4056 | ext4_set_aops(inode); | 4091 | ext4_set_aops(inode); |
4057 | } else if (S_ISDIR(inode->i_mode)) { | 4092 | } else if (S_ISDIR(inode->i_mode)) { |
4058 | inode->i_op = &ext4_dir_inode_operations; | 4093 | inode->i_op = &ext4_dir_inode_operations; |
@@ -4139,6 +4174,65 @@ static int ext4_inode_blocks_set(handle_t *handle, | |||
4139 | return 0; | 4174 | return 0; |
4140 | } | 4175 | } |
4141 | 4176 | ||
4177 | struct other_inode { | ||
4178 | unsigned long orig_ino; | ||
4179 | struct ext4_inode *raw_inode; | ||
4180 | }; | ||
4181 | |||
4182 | static int other_inode_match(struct inode * inode, unsigned long ino, | ||
4183 | void *data) | ||
4184 | { | ||
4185 | struct other_inode *oi = (struct other_inode *) data; | ||
4186 | |||
4187 | if ((inode->i_ino != ino) || | ||
4188 | (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | | ||
4189 | I_DIRTY_SYNC | I_DIRTY_DATASYNC)) || | ||
4190 | ((inode->i_state & I_DIRTY_TIME) == 0)) | ||
4191 | return 0; | ||
4192 | spin_lock(&inode->i_lock); | ||
4193 | if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | | ||
4194 | I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) && | ||
4195 | (inode->i_state & I_DIRTY_TIME)) { | ||
4196 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
4197 | |||
4198 | inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED); | ||
4199 | spin_unlock(&inode->i_lock); | ||
4200 | |||
4201 | spin_lock(&ei->i_raw_lock); | ||
4202 | EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode); | ||
4203 | EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode); | ||
4204 | EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode); | ||
4205 | ext4_inode_csum_set(inode, oi->raw_inode, ei); | ||
4206 | spin_unlock(&ei->i_raw_lock); | ||
4207 | trace_ext4_other_inode_update_time(inode, oi->orig_ino); | ||
4208 | return -1; | ||
4209 | } | ||
4210 | spin_unlock(&inode->i_lock); | ||
4211 | return -1; | ||
4212 | } | ||
4213 | |||
4214 | /* | ||
4215 | * Opportunistically update the other time fields for other inodes in | ||
4216 | * the same inode table block. | ||
4217 | */ | ||
4218 | static void ext4_update_other_inodes_time(struct super_block *sb, | ||
4219 | unsigned long orig_ino, char *buf) | ||
4220 | { | ||
4221 | struct other_inode oi; | ||
4222 | unsigned long ino; | ||
4223 | int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; | ||
4224 | int inode_size = EXT4_INODE_SIZE(sb); | ||
4225 | |||
4226 | oi.orig_ino = orig_ino; | ||
4227 | ino = orig_ino & ~(inodes_per_block - 1); | ||
4228 | for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { | ||
4229 | if (ino == orig_ino) | ||
4230 | continue; | ||
4231 | oi.raw_inode = (struct ext4_inode *) buf; | ||
4232 | (void) find_inode_nowait(sb, ino, other_inode_match, &oi); | ||
4233 | } | ||
4234 | } | ||
4235 | |||
4142 | /* | 4236 | /* |
4143 | * Post the struct inode info into an on-disk inode location in the | 4237 | * Post the struct inode info into an on-disk inode location in the |
4144 | * buffer-cache. This gobbles the caller's reference to the | 4238 | * buffer-cache. This gobbles the caller's reference to the |
@@ -4248,10 +4342,11 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4248 | cpu_to_le16(ei->i_extra_isize); | 4342 | cpu_to_le16(ei->i_extra_isize); |
4249 | } | 4343 | } |
4250 | } | 4344 | } |
4251 | |||
4252 | ext4_inode_csum_set(inode, raw_inode, ei); | 4345 | ext4_inode_csum_set(inode, raw_inode, ei); |
4253 | |||
4254 | spin_unlock(&ei->i_raw_lock); | 4346 | spin_unlock(&ei->i_raw_lock); |
4347 | if (inode->i_sb->s_flags & MS_LAZYTIME) | ||
4348 | ext4_update_other_inodes_time(inode->i_sb, inode->i_ino, | ||
4349 | bh->b_data); | ||
4255 | 4350 | ||
4256 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 4351 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
4257 | rc = ext4_handle_dirty_metadata(handle, NULL, bh); | 4352 | rc = ext4_handle_dirty_metadata(handle, NULL, bh); |
@@ -4534,7 +4629,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4534 | * Truncate pagecache after we've waited for commit | 4629 | * Truncate pagecache after we've waited for commit |
4535 | * in data=journal mode to make pages freeable. | 4630 | * in data=journal mode to make pages freeable. |
4536 | */ | 4631 | */ |
4537 | truncate_pagecache(inode, inode->i_size); | 4632 | truncate_pagecache(inode, inode->i_size); |
4538 | } | 4633 | } |
4539 | /* | 4634 | /* |
4540 | * We want to call ext4_truncate() even if attr->ia_size == | 4635 | * We want to call ext4_truncate() even if attr->ia_size == |
@@ -4840,11 +4935,17 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
4840 | * If the inode is marked synchronous, we don't honour that here - doing | 4935 | * If the inode is marked synchronous, we don't honour that here - doing |
4841 | * so would cause a commit on atime updates, which we don't bother doing. | 4936 | * so would cause a commit on atime updates, which we don't bother doing. |
4842 | * We handle synchronous inodes at the highest possible level. | 4937 | * We handle synchronous inodes at the highest possible level. |
4938 | * | ||
4939 | * If only the I_DIRTY_TIME flag is set, we can skip everything. If | ||
4940 | * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need | ||
4941 | * to copy into the on-disk inode structure are the timestamp files. | ||
4843 | */ | 4942 | */ |
4844 | void ext4_dirty_inode(struct inode *inode, int flags) | 4943 | void ext4_dirty_inode(struct inode *inode, int flags) |
4845 | { | 4944 | { |
4846 | handle_t *handle; | 4945 | handle_t *handle; |
4847 | 4946 | ||
4947 | if (flags == I_DIRTY_TIME) | ||
4948 | return; | ||
4848 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); | 4949 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); |
4849 | if (IS_ERR(handle)) | 4950 | if (IS_ERR(handle)) |
4850 | goto out; | 4951 | goto out; |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 2291923dae4e..28fe71a2904c 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -2235,7 +2235,10 @@ retry: | |||
2235 | err = PTR_ERR(inode); | 2235 | err = PTR_ERR(inode); |
2236 | if (!IS_ERR(inode)) { | 2236 | if (!IS_ERR(inode)) { |
2237 | inode->i_op = &ext4_file_inode_operations; | 2237 | inode->i_op = &ext4_file_inode_operations; |
2238 | inode->i_fop = &ext4_file_operations; | 2238 | if (test_opt(inode->i_sb, DAX)) |
2239 | inode->i_fop = &ext4_dax_file_operations; | ||
2240 | else | ||
2241 | inode->i_fop = &ext4_file_operations; | ||
2239 | ext4_set_aops(inode); | 2242 | ext4_set_aops(inode); |
2240 | err = ext4_add_nondir(handle, dentry, inode); | 2243 | err = ext4_add_nondir(handle, dentry, inode); |
2241 | if (!err && IS_DIRSYNC(dir)) | 2244 | if (!err && IS_DIRSYNC(dir)) |
@@ -2299,7 +2302,10 @@ retry: | |||
2299 | err = PTR_ERR(inode); | 2302 | err = PTR_ERR(inode); |
2300 | if (!IS_ERR(inode)) { | 2303 | if (!IS_ERR(inode)) { |
2301 | inode->i_op = &ext4_file_inode_operations; | 2304 | inode->i_op = &ext4_file_inode_operations; |
2302 | inode->i_fop = &ext4_file_operations; | 2305 | if (test_opt(inode->i_sb, DAX)) |
2306 | inode->i_fop = &ext4_dax_file_operations; | ||
2307 | else | ||
2308 | inode->i_fop = &ext4_file_operations; | ||
2303 | ext4_set_aops(inode); | 2309 | ext4_set_aops(inode); |
2304 | d_tmpfile(dentry, inode); | 2310 | d_tmpfile(dentry, inode); |
2305 | err = ext4_orphan_add(handle, inode); | 2311 | err = ext4_orphan_add(handle, inode); |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 74c5f53595fb..1adac6868e6f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -334,7 +334,7 @@ static void save_error_info(struct super_block *sb, const char *func, | |||
334 | static int block_device_ejected(struct super_block *sb) | 334 | static int block_device_ejected(struct super_block *sb) |
335 | { | 335 | { |
336 | struct inode *bd_inode = sb->s_bdev->bd_inode; | 336 | struct inode *bd_inode = sb->s_bdev->bd_inode; |
337 | struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info; | 337 | struct backing_dev_info *bdi = inode_to_bdi(bd_inode); |
338 | 338 | ||
339 | return bdi->dev == NULL; | 339 | return bdi->dev == NULL; |
340 | } | 340 | } |
@@ -1046,10 +1046,7 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot); | |||
1046 | static int ext4_write_info(struct super_block *sb, int type); | 1046 | static int ext4_write_info(struct super_block *sb, int type); |
1047 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, | 1047 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, |
1048 | struct path *path); | 1048 | struct path *path); |
1049 | static int ext4_quota_on_sysfile(struct super_block *sb, int type, | ||
1050 | int format_id); | ||
1051 | static int ext4_quota_off(struct super_block *sb, int type); | 1049 | static int ext4_quota_off(struct super_block *sb, int type); |
1052 | static int ext4_quota_off_sysfile(struct super_block *sb, int type); | ||
1053 | static int ext4_quota_on_mount(struct super_block *sb, int type); | 1050 | static int ext4_quota_on_mount(struct super_block *sb, int type); |
1054 | static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, | 1051 | static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, |
1055 | size_t len, loff_t off); | 1052 | size_t len, loff_t off); |
@@ -1084,16 +1081,6 @@ static const struct quotactl_ops ext4_qctl_operations = { | |||
1084 | .get_dqblk = dquot_get_dqblk, | 1081 | .get_dqblk = dquot_get_dqblk, |
1085 | .set_dqblk = dquot_set_dqblk | 1082 | .set_dqblk = dquot_set_dqblk |
1086 | }; | 1083 | }; |
1087 | |||
1088 | static const struct quotactl_ops ext4_qctl_sysfile_operations = { | ||
1089 | .quota_on_meta = ext4_quota_on_sysfile, | ||
1090 | .quota_off = ext4_quota_off_sysfile, | ||
1091 | .quota_sync = dquot_quota_sync, | ||
1092 | .get_info = dquot_get_dqinfo, | ||
1093 | .set_info = dquot_set_dqinfo, | ||
1094 | .get_dqblk = dquot_get_dqblk, | ||
1095 | .set_dqblk = dquot_set_dqblk | ||
1096 | }; | ||
1097 | #endif | 1084 | #endif |
1098 | 1085 | ||
1099 | static const struct super_operations ext4_sops = { | 1086 | static const struct super_operations ext4_sops = { |
@@ -1137,8 +1124,9 @@ enum { | |||
1137 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, | 1124 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
1138 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, | 1125 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, |
1139 | Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, | 1126 | Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, |
1140 | Opt_usrquota, Opt_grpquota, Opt_i_version, | 1127 | Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax, |
1141 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, | 1128 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, |
1129 | Opt_lazytime, Opt_nolazytime, | ||
1142 | Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, | 1130 | Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, |
1143 | Opt_inode_readahead_blks, Opt_journal_ioprio, | 1131 | Opt_inode_readahead_blks, Opt_journal_ioprio, |
1144 | Opt_dioread_nolock, Opt_dioread_lock, | 1132 | Opt_dioread_nolock, Opt_dioread_lock, |
@@ -1200,8 +1188,11 @@ static const match_table_t tokens = { | |||
1200 | {Opt_barrier, "barrier"}, | 1188 | {Opt_barrier, "barrier"}, |
1201 | {Opt_nobarrier, "nobarrier"}, | 1189 | {Opt_nobarrier, "nobarrier"}, |
1202 | {Opt_i_version, "i_version"}, | 1190 | {Opt_i_version, "i_version"}, |
1191 | {Opt_dax, "dax"}, | ||
1203 | {Opt_stripe, "stripe=%u"}, | 1192 | {Opt_stripe, "stripe=%u"}, |
1204 | {Opt_delalloc, "delalloc"}, | 1193 | {Opt_delalloc, "delalloc"}, |
1194 | {Opt_lazytime, "lazytime"}, | ||
1195 | {Opt_nolazytime, "nolazytime"}, | ||
1205 | {Opt_nodelalloc, "nodelalloc"}, | 1196 | {Opt_nodelalloc, "nodelalloc"}, |
1206 | {Opt_removed, "mblk_io_submit"}, | 1197 | {Opt_removed, "mblk_io_submit"}, |
1207 | {Opt_removed, "nomblk_io_submit"}, | 1198 | {Opt_removed, "nomblk_io_submit"}, |
@@ -1384,6 +1375,7 @@ static const struct mount_opts { | |||
1384 | {Opt_min_batch_time, 0, MOPT_GTE0}, | 1375 | {Opt_min_batch_time, 0, MOPT_GTE0}, |
1385 | {Opt_inode_readahead_blks, 0, MOPT_GTE0}, | 1376 | {Opt_inode_readahead_blks, 0, MOPT_GTE0}, |
1386 | {Opt_init_itable, 0, MOPT_GTE0}, | 1377 | {Opt_init_itable, 0, MOPT_GTE0}, |
1378 | {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET}, | ||
1387 | {Opt_stripe, 0, MOPT_GTE0}, | 1379 | {Opt_stripe, 0, MOPT_GTE0}, |
1388 | {Opt_resuid, 0, MOPT_GTE0}, | 1380 | {Opt_resuid, 0, MOPT_GTE0}, |
1389 | {Opt_resgid, 0, MOPT_GTE0}, | 1381 | {Opt_resgid, 0, MOPT_GTE0}, |
@@ -1459,6 +1451,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, | |||
1459 | case Opt_i_version: | 1451 | case Opt_i_version: |
1460 | sb->s_flags |= MS_I_VERSION; | 1452 | sb->s_flags |= MS_I_VERSION; |
1461 | return 1; | 1453 | return 1; |
1454 | case Opt_lazytime: | ||
1455 | sb->s_flags |= MS_LAZYTIME; | ||
1456 | return 1; | ||
1457 | case Opt_nolazytime: | ||
1458 | sb->s_flags &= ~MS_LAZYTIME; | ||
1459 | return 1; | ||
1462 | } | 1460 | } |
1463 | 1461 | ||
1464 | for (m = ext4_mount_opts; m->token != Opt_err; m++) | 1462 | for (m = ext4_mount_opts; m->token != Opt_err; m++) |
@@ -1620,6 +1618,11 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, | |||
1620 | } | 1618 | } |
1621 | sbi->s_jquota_fmt = m->mount_opt; | 1619 | sbi->s_jquota_fmt = m->mount_opt; |
1622 | #endif | 1620 | #endif |
1621 | #ifndef CONFIG_FS_DAX | ||
1622 | } else if (token == Opt_dax) { | ||
1623 | ext4_msg(sb, KERN_INFO, "dax option not supported"); | ||
1624 | return -1; | ||
1625 | #endif | ||
1623 | } else { | 1626 | } else { |
1624 | if (!args->from) | 1627 | if (!args->from) |
1625 | arg = 1; | 1628 | arg = 1; |
@@ -3602,6 +3605,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3602 | "both data=journal and dioread_nolock"); | 3605 | "both data=journal and dioread_nolock"); |
3603 | goto failed_mount; | 3606 | goto failed_mount; |
3604 | } | 3607 | } |
3608 | if (test_opt(sb, DAX)) { | ||
3609 | ext4_msg(sb, KERN_ERR, "can't mount with " | ||
3610 | "both data=journal and dax"); | ||
3611 | goto failed_mount; | ||
3612 | } | ||
3605 | if (test_opt(sb, DELALLOC)) | 3613 | if (test_opt(sb, DELALLOC)) |
3606 | clear_opt(sb, DELALLOC); | 3614 | clear_opt(sb, DELALLOC); |
3607 | } | 3615 | } |
@@ -3665,6 +3673,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3665 | goto failed_mount; | 3673 | goto failed_mount; |
3666 | } | 3674 | } |
3667 | 3675 | ||
3676 | if (sbi->s_mount_opt & EXT4_MOUNT_DAX) { | ||
3677 | if (blocksize != PAGE_SIZE) { | ||
3678 | ext4_msg(sb, KERN_ERR, | ||
3679 | "error: unsupported blocksize for dax"); | ||
3680 | goto failed_mount; | ||
3681 | } | ||
3682 | if (!sb->s_bdev->bd_disk->fops->direct_access) { | ||
3683 | ext4_msg(sb, KERN_ERR, | ||
3684 | "error: device does not support dax"); | ||
3685 | goto failed_mount; | ||
3686 | } | ||
3687 | } | ||
3688 | |||
3668 | if (sb->s_blocksize != blocksize) { | 3689 | if (sb->s_blocksize != blocksize) { |
3669 | /* Validate the filesystem blocksize */ | 3690 | /* Validate the filesystem blocksize */ |
3670 | if (!sb_set_blocksize(sb, blocksize)) { | 3691 | if (!sb_set_blocksize(sb, blocksize)) { |
@@ -3935,7 +3956,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3935 | #ifdef CONFIG_QUOTA | 3956 | #ifdef CONFIG_QUOTA |
3936 | sb->dq_op = &ext4_quota_operations; | 3957 | sb->dq_op = &ext4_quota_operations; |
3937 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) | 3958 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) |
3938 | sb->s_qcop = &ext4_qctl_sysfile_operations; | 3959 | sb->s_qcop = &dquot_quotactl_sysfile_ops; |
3939 | else | 3960 | else |
3940 | sb->s_qcop = &ext4_qctl_operations; | 3961 | sb->s_qcop = &ext4_qctl_operations; |
3941 | sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; | 3962 | sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; |
@@ -4882,6 +4903,18 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4882 | err = -EINVAL; | 4903 | err = -EINVAL; |
4883 | goto restore_opts; | 4904 | goto restore_opts; |
4884 | } | 4905 | } |
4906 | if (test_opt(sb, DAX)) { | ||
4907 | ext4_msg(sb, KERN_ERR, "can't mount with " | ||
4908 | "both data=journal and dax"); | ||
4909 | err = -EINVAL; | ||
4910 | goto restore_opts; | ||
4911 | } | ||
4912 | } | ||
4913 | |||
4914 | if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) { | ||
4915 | ext4_msg(sb, KERN_WARNING, "warning: refusing change of " | ||
4916 | "dax flag with busy inodes while remounting"); | ||
4917 | sbi->s_mount_opt ^= EXT4_MOUNT_DAX; | ||
4885 | } | 4918 | } |
4886 | 4919 | ||
4887 | if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) | 4920 | if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) |
@@ -5020,6 +5053,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
5020 | } | 5053 | } |
5021 | #endif | 5054 | #endif |
5022 | 5055 | ||
5056 | *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME); | ||
5023 | ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); | 5057 | ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); |
5024 | kfree(orig_data); | 5058 | kfree(orig_data); |
5025 | return 0; | 5059 | return 0; |
@@ -5288,21 +5322,6 @@ static int ext4_enable_quotas(struct super_block *sb) | |||
5288 | return 0; | 5322 | return 0; |
5289 | } | 5323 | } |
5290 | 5324 | ||
5291 | /* | ||
5292 | * quota_on function that is used when QUOTA feature is set. | ||
5293 | */ | ||
5294 | static int ext4_quota_on_sysfile(struct super_block *sb, int type, | ||
5295 | int format_id) | ||
5296 | { | ||
5297 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) | ||
5298 | return -EINVAL; | ||
5299 | |||
5300 | /* | ||
5301 | * USAGE was enabled at mount time. Only need to enable LIMITS now. | ||
5302 | */ | ||
5303 | return ext4_quota_enable(sb, type, format_id, DQUOT_LIMITS_ENABLED); | ||
5304 | } | ||
5305 | |||
5306 | static int ext4_quota_off(struct super_block *sb, int type) | 5325 | static int ext4_quota_off(struct super_block *sb, int type) |
5307 | { | 5326 | { |
5308 | struct inode *inode = sb_dqopt(sb)->files[type]; | 5327 | struct inode *inode = sb_dqopt(sb)->files[type]; |
@@ -5329,18 +5348,6 @@ out: | |||
5329 | return dquot_quota_off(sb, type); | 5348 | return dquot_quota_off(sb, type); |
5330 | } | 5349 | } |
5331 | 5350 | ||
5332 | /* | ||
5333 | * quota_off function that is used when QUOTA feature is set. | ||
5334 | */ | ||
5335 | static int ext4_quota_off_sysfile(struct super_block *sb, int type) | ||
5336 | { | ||
5337 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) | ||
5338 | return -EINVAL; | ||
5339 | |||
5340 | /* Disable only the limits. */ | ||
5341 | return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); | ||
5342 | } | ||
5343 | |||
5344 | /* Read data from quotafile - avoid pagecache and such because we cannot afford | 5351 | /* Read data from quotafile - avoid pagecache and such because we cannot afford |
5345 | * acquiring the locks... As quota files are never truncated and quota code | 5352 | * acquiring the locks... As quota files are never truncated and quota code |
5346 | * itself serializes the operations (and no one else should touch the files) | 5353 | * itself serializes the operations (and no one else should touch the files) |
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 736a348509f7..94e2d2ffabe1 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig | |||
@@ -71,3 +71,13 @@ config F2FS_CHECK_FS | |||
71 | Enables BUG_ONs which check the filesystem consistency in runtime. | 71 | Enables BUG_ONs which check the filesystem consistency in runtime. |
72 | 72 | ||
73 | If you want to improve the performance, say N. | 73 | If you want to improve the performance, say N. |
74 | |||
75 | config F2FS_IO_TRACE | ||
76 | bool "F2FS IO tracer" | ||
77 | depends on F2FS_FS | ||
78 | depends on FUNCTION_TRACER | ||
79 | help | ||
80 | F2FS IO trace is based on a function trace, which gathers process | ||
81 | information and block IO patterns in the filesystem level. | ||
82 | |||
83 | If unsure, say N. | ||
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index 2e35da12d292..d92397731db8 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile | |||
@@ -5,3 +5,4 @@ f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o | |||
5 | f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o | 5 | f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o |
6 | f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o | 6 | f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o |
7 | f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o | 7 | f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o |
8 | f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o | ||
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 1ccb26bc2a0b..742202779bd5 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c | |||
@@ -62,7 +62,7 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) | |||
62 | if (count == 0) | 62 | if (count == 0) |
63 | return NULL; | 63 | return NULL; |
64 | 64 | ||
65 | acl = posix_acl_alloc(count, GFP_KERNEL); | 65 | acl = posix_acl_alloc(count, GFP_NOFS); |
66 | if (!acl) | 66 | if (!acl) |
67 | return ERR_PTR(-ENOMEM); | 67 | return ERR_PTR(-ENOMEM); |
68 | 68 | ||
@@ -116,7 +116,7 @@ static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size) | |||
116 | int i; | 116 | int i; |
117 | 117 | ||
118 | f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count * | 118 | f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count * |
119 | sizeof(struct f2fs_acl_entry), GFP_KERNEL); | 119 | sizeof(struct f2fs_acl_entry), GFP_NOFS); |
120 | if (!f2fs_acl) | 120 | if (!f2fs_acl) |
121 | return ERR_PTR(-ENOMEM); | 121 | return ERR_PTR(-ENOMEM); |
122 | 122 | ||
@@ -396,7 +396,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, | |||
396 | posix_acl_release(default_acl); | 396 | posix_acl_release(default_acl); |
397 | } | 397 | } |
398 | if (acl) { | 398 | if (acl) { |
399 | if (error) | 399 | if (!error) |
400 | error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, | 400 | error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, |
401 | ipage); | 401 | ipage); |
402 | posix_acl_release(acl); | 402 | posix_acl_release(acl); |
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index e6c271fefaca..7f794b72b3b7 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c | |||
@@ -20,10 +20,11 @@ | |||
20 | #include "f2fs.h" | 20 | #include "f2fs.h" |
21 | #include "node.h" | 21 | #include "node.h" |
22 | #include "segment.h" | 22 | #include "segment.h" |
23 | #include "trace.h" | ||
23 | #include <trace/events/f2fs.h> | 24 | #include <trace/events/f2fs.h> |
24 | 25 | ||
25 | static struct kmem_cache *ino_entry_slab; | 26 | static struct kmem_cache *ino_entry_slab; |
26 | static struct kmem_cache *inode_entry_slab; | 27 | struct kmem_cache *inode_entry_slab; |
27 | 28 | ||
28 | /* | 29 | /* |
29 | * We guarantee no failure on the returned page. | 30 | * We guarantee no failure on the returned page. |
@@ -50,6 +51,11 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) | |||
50 | { | 51 | { |
51 | struct address_space *mapping = META_MAPPING(sbi); | 52 | struct address_space *mapping = META_MAPPING(sbi); |
52 | struct page *page; | 53 | struct page *page; |
54 | struct f2fs_io_info fio = { | ||
55 | .type = META, | ||
56 | .rw = READ_SYNC | REQ_META | REQ_PRIO, | ||
57 | .blk_addr = index, | ||
58 | }; | ||
53 | repeat: | 59 | repeat: |
54 | page = grab_cache_page(mapping, index); | 60 | page = grab_cache_page(mapping, index); |
55 | if (!page) { | 61 | if (!page) { |
@@ -59,8 +65,7 @@ repeat: | |||
59 | if (PageUptodate(page)) | 65 | if (PageUptodate(page)) |
60 | goto out; | 66 | goto out; |
61 | 67 | ||
62 | if (f2fs_submit_page_bio(sbi, page, index, | 68 | if (f2fs_submit_page_bio(sbi, page, &fio)) |
63 | READ_SYNC | REQ_META | REQ_PRIO)) | ||
64 | goto repeat; | 69 | goto repeat; |
65 | 70 | ||
66 | lock_page(page); | 71 | lock_page(page); |
@@ -112,14 +117,12 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type | |||
112 | block_t prev_blk_addr = 0; | 117 | block_t prev_blk_addr = 0; |
113 | struct page *page; | 118 | struct page *page; |
114 | block_t blkno = start; | 119 | block_t blkno = start; |
115 | |||
116 | struct f2fs_io_info fio = { | 120 | struct f2fs_io_info fio = { |
117 | .type = META, | 121 | .type = META, |
118 | .rw = READ_SYNC | REQ_META | REQ_PRIO | 122 | .rw = READ_SYNC | REQ_META | REQ_PRIO |
119 | }; | 123 | }; |
120 | 124 | ||
121 | for (; nrpages-- > 0; blkno++) { | 125 | for (; nrpages-- > 0; blkno++) { |
122 | block_t blk_addr; | ||
123 | 126 | ||
124 | if (!is_valid_blkaddr(sbi, blkno, type)) | 127 | if (!is_valid_blkaddr(sbi, blkno, type)) |
125 | goto out; | 128 | goto out; |
@@ -130,27 +133,27 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type | |||
130 | NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid))) | 133 | NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid))) |
131 | blkno = 0; | 134 | blkno = 0; |
132 | /* get nat block addr */ | 135 | /* get nat block addr */ |
133 | blk_addr = current_nat_addr(sbi, | 136 | fio.blk_addr = current_nat_addr(sbi, |
134 | blkno * NAT_ENTRY_PER_BLOCK); | 137 | blkno * NAT_ENTRY_PER_BLOCK); |
135 | break; | 138 | break; |
136 | case META_SIT: | 139 | case META_SIT: |
137 | /* get sit block addr */ | 140 | /* get sit block addr */ |
138 | blk_addr = current_sit_addr(sbi, | 141 | fio.blk_addr = current_sit_addr(sbi, |
139 | blkno * SIT_ENTRY_PER_BLOCK); | 142 | blkno * SIT_ENTRY_PER_BLOCK); |
140 | if (blkno != start && prev_blk_addr + 1 != blk_addr) | 143 | if (blkno != start && prev_blk_addr + 1 != fio.blk_addr) |
141 | goto out; | 144 | goto out; |
142 | prev_blk_addr = blk_addr; | 145 | prev_blk_addr = fio.blk_addr; |
143 | break; | 146 | break; |
144 | case META_SSA: | 147 | case META_SSA: |
145 | case META_CP: | 148 | case META_CP: |
146 | case META_POR: | 149 | case META_POR: |
147 | blk_addr = blkno; | 150 | fio.blk_addr = blkno; |
148 | break; | 151 | break; |
149 | default: | 152 | default: |
150 | BUG(); | 153 | BUG(); |
151 | } | 154 | } |
152 | 155 | ||
153 | page = grab_cache_page(META_MAPPING(sbi), blk_addr); | 156 | page = grab_cache_page(META_MAPPING(sbi), fio.blk_addr); |
154 | if (!page) | 157 | if (!page) |
155 | continue; | 158 | continue; |
156 | if (PageUptodate(page)) { | 159 | if (PageUptodate(page)) { |
@@ -158,7 +161,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type | |||
158 | continue; | 161 | continue; |
159 | } | 162 | } |
160 | 163 | ||
161 | f2fs_submit_page_mbio(sbi, page, blk_addr, &fio); | 164 | f2fs_submit_page_mbio(sbi, page, &fio); |
162 | f2fs_put_page(page, 0); | 165 | f2fs_put_page(page, 0); |
163 | } | 166 | } |
164 | out: | 167 | out: |
@@ -187,7 +190,7 @@ static int f2fs_write_meta_page(struct page *page, | |||
187 | 190 | ||
188 | trace_f2fs_writepage(page, META); | 191 | trace_f2fs_writepage(page, META); |
189 | 192 | ||
190 | if (unlikely(sbi->por_doing)) | 193 | if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) |
191 | goto redirty_out; | 194 | goto redirty_out; |
192 | if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) | 195 | if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) |
193 | goto redirty_out; | 196 | goto redirty_out; |
@@ -299,6 +302,8 @@ static int f2fs_set_meta_page_dirty(struct page *page) | |||
299 | if (!PageDirty(page)) { | 302 | if (!PageDirty(page)) { |
300 | __set_page_dirty_nobuffers(page); | 303 | __set_page_dirty_nobuffers(page); |
301 | inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); | 304 | inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); |
305 | SetPagePrivate(page); | ||
306 | f2fs_trace_pid(page); | ||
302 | return 1; | 307 | return 1; |
303 | } | 308 | } |
304 | return 0; | 309 | return 0; |
@@ -308,6 +313,8 @@ const struct address_space_operations f2fs_meta_aops = { | |||
308 | .writepage = f2fs_write_meta_page, | 313 | .writepage = f2fs_write_meta_page, |
309 | .writepages = f2fs_write_meta_pages, | 314 | .writepages = f2fs_write_meta_pages, |
310 | .set_page_dirty = f2fs_set_meta_page_dirty, | 315 | .set_page_dirty = f2fs_set_meta_page_dirty, |
316 | .invalidatepage = f2fs_invalidate_page, | ||
317 | .releasepage = f2fs_release_page, | ||
311 | }; | 318 | }; |
312 | 319 | ||
313 | static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) | 320 | static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) |
@@ -462,7 +469,7 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi) | |||
462 | if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) | 469 | if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) |
463 | return; | 470 | return; |
464 | 471 | ||
465 | sbi->por_doing = true; | 472 | set_sbi_flag(sbi, SBI_POR_DOING); |
466 | 473 | ||
467 | start_blk = __start_cp_addr(sbi) + 1 + | 474 | start_blk = __start_cp_addr(sbi) + 1 + |
468 | le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); | 475 | le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); |
@@ -483,7 +490,7 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi) | |||
483 | } | 490 | } |
484 | /* clear Orphan Flag */ | 491 | /* clear Orphan Flag */ |
485 | clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); | 492 | clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); |
486 | sbi->por_doing = false; | 493 | clear_sbi_flag(sbi, SBI_POR_DOING); |
487 | return; | 494 | return; |
488 | } | 495 | } |
489 | 496 | ||
@@ -567,7 +574,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, | |||
567 | if (crc_offset >= blk_size) | 574 | if (crc_offset >= blk_size) |
568 | goto invalid_cp1; | 575 | goto invalid_cp1; |
569 | 576 | ||
570 | crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); | 577 | crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset))); |
571 | if (!f2fs_crc_valid(crc, cp_block, crc_offset)) | 578 | if (!f2fs_crc_valid(crc, cp_block, crc_offset)) |
572 | goto invalid_cp1; | 579 | goto invalid_cp1; |
573 | 580 | ||
@@ -582,7 +589,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, | |||
582 | if (crc_offset >= blk_size) | 589 | if (crc_offset >= blk_size) |
583 | goto invalid_cp2; | 590 | goto invalid_cp2; |
584 | 591 | ||
585 | crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); | 592 | crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset))); |
586 | if (!f2fs_crc_valid(crc, cp_block, crc_offset)) | 593 | if (!f2fs_crc_valid(crc, cp_block, crc_offset)) |
587 | goto invalid_cp2; | 594 | goto invalid_cp2; |
588 | 595 | ||
@@ -669,7 +676,7 @@ fail_no_cp: | |||
669 | return -EINVAL; | 676 | return -EINVAL; |
670 | } | 677 | } |
671 | 678 | ||
672 | static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) | 679 | static int __add_dirty_inode(struct inode *inode, struct inode_entry *new) |
673 | { | 680 | { |
674 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | 681 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
675 | 682 | ||
@@ -686,7 +693,7 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) | |||
686 | void update_dirty_page(struct inode *inode, struct page *page) | 693 | void update_dirty_page(struct inode *inode, struct page *page) |
687 | { | 694 | { |
688 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | 695 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
689 | struct dir_inode_entry *new; | 696 | struct inode_entry *new; |
690 | int ret = 0; | 697 | int ret = 0; |
691 | 698 | ||
692 | if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) | 699 | if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) |
@@ -710,12 +717,13 @@ void update_dirty_page(struct inode *inode, struct page *page) | |||
710 | kmem_cache_free(inode_entry_slab, new); | 717 | kmem_cache_free(inode_entry_slab, new); |
711 | out: | 718 | out: |
712 | SetPagePrivate(page); | 719 | SetPagePrivate(page); |
720 | f2fs_trace_pid(page); | ||
713 | } | 721 | } |
714 | 722 | ||
715 | void add_dirty_dir_inode(struct inode *inode) | 723 | void add_dirty_dir_inode(struct inode *inode) |
716 | { | 724 | { |
717 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | 725 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
718 | struct dir_inode_entry *new = | 726 | struct inode_entry *new = |
719 | f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); | 727 | f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); |
720 | int ret = 0; | 728 | int ret = 0; |
721 | 729 | ||
@@ -733,7 +741,7 @@ void add_dirty_dir_inode(struct inode *inode) | |||
733 | void remove_dirty_dir_inode(struct inode *inode) | 741 | void remove_dirty_dir_inode(struct inode *inode) |
734 | { | 742 | { |
735 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | 743 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
736 | struct dir_inode_entry *entry; | 744 | struct inode_entry *entry; |
737 | 745 | ||
738 | if (!S_ISDIR(inode->i_mode)) | 746 | if (!S_ISDIR(inode->i_mode)) |
739 | return; | 747 | return; |
@@ -763,7 +771,7 @@ void remove_dirty_dir_inode(struct inode *inode) | |||
763 | void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) | 771 | void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) |
764 | { | 772 | { |
765 | struct list_head *head; | 773 | struct list_head *head; |
766 | struct dir_inode_entry *entry; | 774 | struct inode_entry *entry; |
767 | struct inode *inode; | 775 | struct inode *inode; |
768 | retry: | 776 | retry: |
769 | if (unlikely(f2fs_cp_error(sbi))) | 777 | if (unlikely(f2fs_cp_error(sbi))) |
@@ -776,7 +784,7 @@ retry: | |||
776 | spin_unlock(&sbi->dir_inode_lock); | 784 | spin_unlock(&sbi->dir_inode_lock); |
777 | return; | 785 | return; |
778 | } | 786 | } |
779 | entry = list_entry(head->next, struct dir_inode_entry, list); | 787 | entry = list_entry(head->next, struct inode_entry, list); |
780 | inode = igrab(entry->inode); | 788 | inode = igrab(entry->inode); |
781 | spin_unlock(&sbi->dir_inode_lock); | 789 | spin_unlock(&sbi->dir_inode_lock); |
782 | if (inode) { | 790 | if (inode) { |
@@ -922,7 +930,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
922 | ckpt->next_free_nid = cpu_to_le32(last_nid); | 930 | ckpt->next_free_nid = cpu_to_le32(last_nid); |
923 | 931 | ||
924 | /* 2 cp + n data seg summary + orphan inode blocks */ | 932 | /* 2 cp + n data seg summary + orphan inode blocks */ |
925 | data_sum_blocks = npages_for_summary_flush(sbi); | 933 | data_sum_blocks = npages_for_summary_flush(sbi, false); |
926 | if (data_sum_blocks < NR_CURSEG_DATA_TYPE) | 934 | if (data_sum_blocks < NR_CURSEG_DATA_TYPE) |
927 | set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); | 935 | set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); |
928 | else | 936 | else |
@@ -932,24 +940,31 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
932 | ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + | 940 | ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + |
933 | orphan_blocks); | 941 | orphan_blocks); |
934 | 942 | ||
935 | if (cpc->reason == CP_UMOUNT) { | 943 | if (__remain_node_summaries(cpc->reason)) |
936 | set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); | ||
937 | ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ | 944 | ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ |
938 | cp_payload_blks + data_sum_blocks + | 945 | cp_payload_blks + data_sum_blocks + |
939 | orphan_blocks + NR_CURSEG_NODE_TYPE); | 946 | orphan_blocks + NR_CURSEG_NODE_TYPE); |
940 | } else { | 947 | else |
941 | clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); | ||
942 | ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS + | 948 | ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS + |
943 | cp_payload_blks + data_sum_blocks + | 949 | cp_payload_blks + data_sum_blocks + |
944 | orphan_blocks); | 950 | orphan_blocks); |
945 | } | 951 | |
952 | if (cpc->reason == CP_UMOUNT) | ||
953 | set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); | ||
954 | else | ||
955 | clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); | ||
956 | |||
957 | if (cpc->reason == CP_FASTBOOT) | ||
958 | set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); | ||
959 | else | ||
960 | clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); | ||
946 | 961 | ||
947 | if (orphan_num) | 962 | if (orphan_num) |
948 | set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); | 963 | set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); |
949 | else | 964 | else |
950 | clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); | 965 | clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); |
951 | 966 | ||
952 | if (sbi->need_fsck) | 967 | if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) |
953 | set_ckpt_flags(ckpt, CP_FSCK_FLAG); | 968 | set_ckpt_flags(ckpt, CP_FSCK_FLAG); |
954 | 969 | ||
955 | /* update SIT/NAT bitmap */ | 970 | /* update SIT/NAT bitmap */ |
@@ -966,15 +981,14 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
966 | /* write out checkpoint buffer at block 0 */ | 981 | /* write out checkpoint buffer at block 0 */ |
967 | cp_page = grab_meta_page(sbi, start_blk++); | 982 | cp_page = grab_meta_page(sbi, start_blk++); |
968 | kaddr = page_address(cp_page); | 983 | kaddr = page_address(cp_page); |
969 | memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); | 984 | memcpy(kaddr, ckpt, F2FS_BLKSIZE); |
970 | set_page_dirty(cp_page); | 985 | set_page_dirty(cp_page); |
971 | f2fs_put_page(cp_page, 1); | 986 | f2fs_put_page(cp_page, 1); |
972 | 987 | ||
973 | for (i = 1; i < 1 + cp_payload_blks; i++) { | 988 | for (i = 1; i < 1 + cp_payload_blks; i++) { |
974 | cp_page = grab_meta_page(sbi, start_blk++); | 989 | cp_page = grab_meta_page(sbi, start_blk++); |
975 | kaddr = page_address(cp_page); | 990 | kaddr = page_address(cp_page); |
976 | memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, | 991 | memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, F2FS_BLKSIZE); |
977 | (1 << sbi->log_blocksize)); | ||
978 | set_page_dirty(cp_page); | 992 | set_page_dirty(cp_page); |
979 | f2fs_put_page(cp_page, 1); | 993 | f2fs_put_page(cp_page, 1); |
980 | } | 994 | } |
@@ -986,7 +1000,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
986 | 1000 | ||
987 | write_data_summaries(sbi, start_blk); | 1001 | write_data_summaries(sbi, start_blk); |
988 | start_blk += data_sum_blocks; | 1002 | start_blk += data_sum_blocks; |
989 | if (cpc->reason == CP_UMOUNT) { | 1003 | if (__remain_node_summaries(cpc->reason)) { |
990 | write_node_summaries(sbi, start_blk); | 1004 | write_node_summaries(sbi, start_blk); |
991 | start_blk += NR_CURSEG_NODE_TYPE; | 1005 | start_blk += NR_CURSEG_NODE_TYPE; |
992 | } | 1006 | } |
@@ -994,7 +1008,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
994 | /* writeout checkpoint block */ | 1008 | /* writeout checkpoint block */ |
995 | cp_page = grab_meta_page(sbi, start_blk); | 1009 | cp_page = grab_meta_page(sbi, start_blk); |
996 | kaddr = page_address(cp_page); | 1010 | kaddr = page_address(cp_page); |
997 | memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); | 1011 | memcpy(kaddr, ckpt, F2FS_BLKSIZE); |
998 | set_page_dirty(cp_page); | 1012 | set_page_dirty(cp_page); |
999 | f2fs_put_page(cp_page, 1); | 1013 | f2fs_put_page(cp_page, 1); |
1000 | 1014 | ||
@@ -1023,7 +1037,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
1023 | return; | 1037 | return; |
1024 | 1038 | ||
1025 | clear_prefree_segments(sbi); | 1039 | clear_prefree_segments(sbi); |
1026 | F2FS_RESET_SB_DIRT(sbi); | 1040 | clear_sbi_flag(sbi, SBI_IS_DIRTY); |
1027 | } | 1041 | } |
1028 | 1042 | ||
1029 | /* | 1043 | /* |
@@ -1038,10 +1052,13 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
1038 | 1052 | ||
1039 | mutex_lock(&sbi->cp_mutex); | 1053 | mutex_lock(&sbi->cp_mutex); |
1040 | 1054 | ||
1041 | if (!sbi->s_dirty && cpc->reason != CP_DISCARD) | 1055 | if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && |
1056 | cpc->reason != CP_DISCARD && cpc->reason != CP_UMOUNT) | ||
1042 | goto out; | 1057 | goto out; |
1043 | if (unlikely(f2fs_cp_error(sbi))) | 1058 | if (unlikely(f2fs_cp_error(sbi))) |
1044 | goto out; | 1059 | goto out; |
1060 | if (f2fs_readonly(sbi->sb)) | ||
1061 | goto out; | ||
1045 | if (block_operations(sbi)) | 1062 | if (block_operations(sbi)) |
1046 | goto out; | 1063 | goto out; |
1047 | 1064 | ||
@@ -1102,8 +1119,8 @@ int __init create_checkpoint_caches(void) | |||
1102 | sizeof(struct ino_entry)); | 1119 | sizeof(struct ino_entry)); |
1103 | if (!ino_entry_slab) | 1120 | if (!ino_entry_slab) |
1104 | return -ENOMEM; | 1121 | return -ENOMEM; |
1105 | inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", | 1122 | inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry", |
1106 | sizeof(struct dir_inode_entry)); | 1123 | sizeof(struct inode_entry)); |
1107 | if (!inode_entry_slab) { | 1124 | if (!inode_entry_slab) { |
1108 | kmem_cache_destroy(ino_entry_slab); | 1125 | kmem_cache_destroy(ino_entry_slab); |
1109 | return -ENOMEM; | 1126 | return -ENOMEM; |
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7ec697b37f19..985ed023a750 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "f2fs.h" | 22 | #include "f2fs.h" |
23 | #include "node.h" | 23 | #include "node.h" |
24 | #include "segment.h" | 24 | #include "segment.h" |
25 | #include "trace.h" | ||
25 | #include <trace/events/f2fs.h> | 26 | #include <trace/events/f2fs.h> |
26 | 27 | ||
27 | static void f2fs_read_end_io(struct bio *bio, int err) | 28 | static void f2fs_read_end_io(struct bio *bio, int err) |
@@ -95,11 +96,9 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) | |||
95 | return; | 96 | return; |
96 | 97 | ||
97 | if (is_read_io(fio->rw)) | 98 | if (is_read_io(fio->rw)) |
98 | trace_f2fs_submit_read_bio(io->sbi->sb, fio->rw, | 99 | trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio); |
99 | fio->type, io->bio); | ||
100 | else | 100 | else |
101 | trace_f2fs_submit_write_bio(io->sbi->sb, fio->rw, | 101 | trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio); |
102 | fio->type, io->bio); | ||
103 | 102 | ||
104 | submit_bio(fio->rw, io->bio); | 103 | submit_bio(fio->rw, io->bio); |
105 | io->bio = NULL; | 104 | io->bio = NULL; |
@@ -132,14 +131,15 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, | |||
132 | * Return unlocked page. | 131 | * Return unlocked page. |
133 | */ | 132 | */ |
134 | int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, | 133 | int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, |
135 | block_t blk_addr, int rw) | 134 | struct f2fs_io_info *fio) |
136 | { | 135 | { |
137 | struct bio *bio; | 136 | struct bio *bio; |
138 | 137 | ||
139 | trace_f2fs_submit_page_bio(page, blk_addr, rw); | 138 | trace_f2fs_submit_page_bio(page, fio); |
139 | f2fs_trace_ios(page, fio, 0); | ||
140 | 140 | ||
141 | /* Allocate a new bio */ | 141 | /* Allocate a new bio */ |
142 | bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw)); | 142 | bio = __bio_alloc(sbi, fio->blk_addr, 1, is_read_io(fio->rw)); |
143 | 143 | ||
144 | if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { | 144 | if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { |
145 | bio_put(bio); | 145 | bio_put(bio); |
@@ -147,12 +147,12 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, | |||
147 | return -EFAULT; | 147 | return -EFAULT; |
148 | } | 148 | } |
149 | 149 | ||
150 | submit_bio(rw, bio); | 150 | submit_bio(fio->rw, bio); |
151 | return 0; | 151 | return 0; |
152 | } | 152 | } |
153 | 153 | ||
154 | void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, | 154 | void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, |
155 | block_t blk_addr, struct f2fs_io_info *fio) | 155 | struct f2fs_io_info *fio) |
156 | { | 156 | { |
157 | enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); | 157 | enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); |
158 | struct f2fs_bio_info *io; | 158 | struct f2fs_bio_info *io; |
@@ -160,21 +160,21 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, | |||
160 | 160 | ||
161 | io = is_read ? &sbi->read_io : &sbi->write_io[btype]; | 161 | io = is_read ? &sbi->read_io : &sbi->write_io[btype]; |
162 | 162 | ||
163 | verify_block_addr(sbi, blk_addr); | 163 | verify_block_addr(sbi, fio->blk_addr); |
164 | 164 | ||
165 | down_write(&io->io_rwsem); | 165 | down_write(&io->io_rwsem); |
166 | 166 | ||
167 | if (!is_read) | 167 | if (!is_read) |
168 | inc_page_count(sbi, F2FS_WRITEBACK); | 168 | inc_page_count(sbi, F2FS_WRITEBACK); |
169 | 169 | ||
170 | if (io->bio && (io->last_block_in_bio != blk_addr - 1 || | 170 | if (io->bio && (io->last_block_in_bio != fio->blk_addr - 1 || |
171 | io->fio.rw != fio->rw)) | 171 | io->fio.rw != fio->rw)) |
172 | __submit_merged_bio(io); | 172 | __submit_merged_bio(io); |
173 | alloc_new: | 173 | alloc_new: |
174 | if (io->bio == NULL) { | 174 | if (io->bio == NULL) { |
175 | int bio_blocks = MAX_BIO_BLOCKS(sbi); | 175 | int bio_blocks = MAX_BIO_BLOCKS(sbi); |
176 | 176 | ||
177 | io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read); | 177 | io->bio = __bio_alloc(sbi, fio->blk_addr, bio_blocks, is_read); |
178 | io->fio = *fio; | 178 | io->fio = *fio; |
179 | } | 179 | } |
180 | 180 | ||
@@ -184,10 +184,11 @@ alloc_new: | |||
184 | goto alloc_new; | 184 | goto alloc_new; |
185 | } | 185 | } |
186 | 186 | ||
187 | io->last_block_in_bio = blk_addr; | 187 | io->last_block_in_bio = fio->blk_addr; |
188 | f2fs_trace_ios(page, fio, 0); | ||
188 | 189 | ||
189 | up_write(&io->io_rwsem); | 190 | up_write(&io->io_rwsem); |
190 | trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr); | 191 | trace_f2fs_submit_page_mbio(page, fio); |
191 | } | 192 | } |
192 | 193 | ||
193 | /* | 194 | /* |
@@ -196,7 +197,7 @@ alloc_new: | |||
196 | * ->node_page | 197 | * ->node_page |
197 | * update block addresses in the node page | 198 | * update block addresses in the node page |
198 | */ | 199 | */ |
199 | static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) | 200 | static void __set_data_blkaddr(struct dnode_of_data *dn) |
200 | { | 201 | { |
201 | struct f2fs_node *rn; | 202 | struct f2fs_node *rn; |
202 | __le32 *addr_array; | 203 | __le32 *addr_array; |
@@ -209,7 +210,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) | |||
209 | 210 | ||
210 | /* Get physical address of data block */ | 211 | /* Get physical address of data block */ |
211 | addr_array = blkaddr_in_node(rn); | 212 | addr_array = blkaddr_in_node(rn); |
212 | addr_array[ofs_in_node] = cpu_to_le32(new_addr); | 213 | addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr); |
213 | set_page_dirty(node_page); | 214 | set_page_dirty(node_page); |
214 | } | 215 | } |
215 | 216 | ||
@@ -224,8 +225,8 @@ int reserve_new_block(struct dnode_of_data *dn) | |||
224 | 225 | ||
225 | trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); | 226 | trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); |
226 | 227 | ||
227 | __set_data_blkaddr(dn, NEW_ADDR); | ||
228 | dn->data_blkaddr = NEW_ADDR; | 228 | dn->data_blkaddr = NEW_ADDR; |
229 | __set_data_blkaddr(dn); | ||
229 | mark_inode_dirty(dn->inode); | 230 | mark_inode_dirty(dn->inode); |
230 | sync_inode_page(dn); | 231 | sync_inode_page(dn); |
231 | return 0; | 232 | return 0; |
@@ -273,7 +274,7 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, | |||
273 | unsigned int blkbits = inode->i_sb->s_blocksize_bits; | 274 | unsigned int blkbits = inode->i_sb->s_blocksize_bits; |
274 | size_t count; | 275 | size_t count; |
275 | 276 | ||
276 | clear_buffer_new(bh_result); | 277 | set_buffer_new(bh_result); |
277 | map_bh(bh_result, inode->i_sb, | 278 | map_bh(bh_result, inode->i_sb, |
278 | start_blkaddr + pgofs - start_fofs); | 279 | start_blkaddr + pgofs - start_fofs); |
279 | count = end_fofs - pgofs + 1; | 280 | count = end_fofs - pgofs + 1; |
@@ -290,23 +291,24 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, | |||
290 | return 0; | 291 | return 0; |
291 | } | 292 | } |
292 | 293 | ||
293 | void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) | 294 | void update_extent_cache(struct dnode_of_data *dn) |
294 | { | 295 | { |
295 | struct f2fs_inode_info *fi = F2FS_I(dn->inode); | 296 | struct f2fs_inode_info *fi = F2FS_I(dn->inode); |
296 | pgoff_t fofs, start_fofs, end_fofs; | 297 | pgoff_t fofs, start_fofs, end_fofs; |
297 | block_t start_blkaddr, end_blkaddr; | 298 | block_t start_blkaddr, end_blkaddr; |
298 | int need_update = true; | 299 | int need_update = true; |
299 | 300 | ||
300 | f2fs_bug_on(F2FS_I_SB(dn->inode), blk_addr == NEW_ADDR); | 301 | f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); |
301 | fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + | ||
302 | dn->ofs_in_node; | ||
303 | 302 | ||
304 | /* Update the page address in the parent node */ | 303 | /* Update the page address in the parent node */ |
305 | __set_data_blkaddr(dn, blk_addr); | 304 | __set_data_blkaddr(dn); |
306 | 305 | ||
307 | if (is_inode_flag_set(fi, FI_NO_EXTENT)) | 306 | if (is_inode_flag_set(fi, FI_NO_EXTENT)) |
308 | return; | 307 | return; |
309 | 308 | ||
309 | fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + | ||
310 | dn->ofs_in_node; | ||
311 | |||
310 | write_lock(&fi->ext.ext_lock); | 312 | write_lock(&fi->ext.ext_lock); |
311 | 313 | ||
312 | start_fofs = fi->ext.fofs; | 314 | start_fofs = fi->ext.fofs; |
@@ -320,16 +322,16 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) | |||
320 | 322 | ||
321 | /* Initial extent */ | 323 | /* Initial extent */ |
322 | if (fi->ext.len == 0) { | 324 | if (fi->ext.len == 0) { |
323 | if (blk_addr != NULL_ADDR) { | 325 | if (dn->data_blkaddr != NULL_ADDR) { |
324 | fi->ext.fofs = fofs; | 326 | fi->ext.fofs = fofs; |
325 | fi->ext.blk_addr = blk_addr; | 327 | fi->ext.blk_addr = dn->data_blkaddr; |
326 | fi->ext.len = 1; | 328 | fi->ext.len = 1; |
327 | } | 329 | } |
328 | goto end_update; | 330 | goto end_update; |
329 | } | 331 | } |
330 | 332 | ||
331 | /* Front merge */ | 333 | /* Front merge */ |
332 | if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) { | 334 | if (fofs == start_fofs - 1 && dn->data_blkaddr == start_blkaddr - 1) { |
333 | fi->ext.fofs--; | 335 | fi->ext.fofs--; |
334 | fi->ext.blk_addr--; | 336 | fi->ext.blk_addr--; |
335 | fi->ext.len++; | 337 | fi->ext.len++; |
@@ -337,7 +339,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) | |||
337 | } | 339 | } |
338 | 340 | ||
339 | /* Back merge */ | 341 | /* Back merge */ |
340 | if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) { | 342 | if (fofs == end_fofs + 1 && dn->data_blkaddr == end_blkaddr + 1) { |
341 | fi->ext.len++; | 343 | fi->ext.len++; |
342 | goto end_update; | 344 | goto end_update; |
343 | } | 345 | } |
@@ -376,6 +378,10 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) | |||
376 | struct dnode_of_data dn; | 378 | struct dnode_of_data dn; |
377 | struct page *page; | 379 | struct page *page; |
378 | int err; | 380 | int err; |
381 | struct f2fs_io_info fio = { | ||
382 | .type = DATA, | ||
383 | .rw = sync ? READ_SYNC : READA, | ||
384 | }; | ||
379 | 385 | ||
380 | page = find_get_page(mapping, index); | 386 | page = find_get_page(mapping, index); |
381 | if (page && PageUptodate(page)) | 387 | if (page && PageUptodate(page)) |
@@ -404,8 +410,8 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) | |||
404 | return page; | 410 | return page; |
405 | } | 411 | } |
406 | 412 | ||
407 | err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, dn.data_blkaddr, | 413 | fio.blk_addr = dn.data_blkaddr; |
408 | sync ? READ_SYNC : READA); | 414 | err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); |
409 | if (err) | 415 | if (err) |
410 | return ERR_PTR(err); | 416 | return ERR_PTR(err); |
411 | 417 | ||
@@ -430,7 +436,10 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) | |||
430 | struct dnode_of_data dn; | 436 | struct dnode_of_data dn; |
431 | struct page *page; | 437 | struct page *page; |
432 | int err; | 438 | int err; |
433 | 439 | struct f2fs_io_info fio = { | |
440 | .type = DATA, | ||
441 | .rw = READ_SYNC, | ||
442 | }; | ||
434 | repeat: | 443 | repeat: |
435 | page = grab_cache_page(mapping, index); | 444 | page = grab_cache_page(mapping, index); |
436 | if (!page) | 445 | if (!page) |
@@ -464,8 +473,8 @@ repeat: | |||
464 | return page; | 473 | return page; |
465 | } | 474 | } |
466 | 475 | ||
467 | err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, | 476 | fio.blk_addr = dn.data_blkaddr; |
468 | dn.data_blkaddr, READ_SYNC); | 477 | err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); |
469 | if (err) | 478 | if (err) |
470 | return ERR_PTR(err); | 479 | return ERR_PTR(err); |
471 | 480 | ||
@@ -515,8 +524,12 @@ repeat: | |||
515 | zero_user_segment(page, 0, PAGE_CACHE_SIZE); | 524 | zero_user_segment(page, 0, PAGE_CACHE_SIZE); |
516 | SetPageUptodate(page); | 525 | SetPageUptodate(page); |
517 | } else { | 526 | } else { |
518 | err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, | 527 | struct f2fs_io_info fio = { |
519 | dn.data_blkaddr, READ_SYNC); | 528 | .type = DATA, |
529 | .rw = READ_SYNC, | ||
530 | .blk_addr = dn.data_blkaddr, | ||
531 | }; | ||
532 | err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); | ||
520 | if (err) | 533 | if (err) |
521 | goto put_err; | 534 | goto put_err; |
522 | 535 | ||
@@ -550,30 +563,25 @@ static int __allocate_data_block(struct dnode_of_data *dn) | |||
550 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); | 563 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); |
551 | struct f2fs_inode_info *fi = F2FS_I(dn->inode); | 564 | struct f2fs_inode_info *fi = F2FS_I(dn->inode); |
552 | struct f2fs_summary sum; | 565 | struct f2fs_summary sum; |
553 | block_t new_blkaddr; | ||
554 | struct node_info ni; | 566 | struct node_info ni; |
567 | int seg = CURSEG_WARM_DATA; | ||
555 | pgoff_t fofs; | 568 | pgoff_t fofs; |
556 | int type; | ||
557 | 569 | ||
558 | if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) | 570 | if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) |
559 | return -EPERM; | 571 | return -EPERM; |
560 | if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) | 572 | if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) |
561 | return -ENOSPC; | 573 | return -ENOSPC; |
562 | 574 | ||
563 | __set_data_blkaddr(dn, NEW_ADDR); | ||
564 | dn->data_blkaddr = NEW_ADDR; | ||
565 | |||
566 | get_node_info(sbi, dn->nid, &ni); | 575 | get_node_info(sbi, dn->nid, &ni); |
567 | set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); | 576 | set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); |
568 | 577 | ||
569 | type = CURSEG_WARM_DATA; | 578 | if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page) |
579 | seg = CURSEG_DIRECT_IO; | ||
570 | 580 | ||
571 | allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type); | 581 | allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, seg); |
572 | 582 | ||
573 | /* direct IO doesn't use extent cache to maximize the performance */ | 583 | /* direct IO doesn't use extent cache to maximize the performance */ |
574 | set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); | 584 | __set_data_blkaddr(dn); |
575 | update_extent_cache(new_blkaddr, dn); | ||
576 | clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); | ||
577 | 585 | ||
578 | /* update i_size */ | 586 | /* update i_size */ |
579 | fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + | 587 | fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + |
@@ -581,10 +589,59 @@ static int __allocate_data_block(struct dnode_of_data *dn) | |||
581 | if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT)) | 589 | if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT)) |
582 | i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT)); | 590 | i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT)); |
583 | 591 | ||
584 | dn->data_blkaddr = new_blkaddr; | ||
585 | return 0; | 592 | return 0; |
586 | } | 593 | } |
587 | 594 | ||
595 | static void __allocate_data_blocks(struct inode *inode, loff_t offset, | ||
596 | size_t count) | ||
597 | { | ||
598 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
599 | struct dnode_of_data dn; | ||
600 | u64 start = F2FS_BYTES_TO_BLK(offset); | ||
601 | u64 len = F2FS_BYTES_TO_BLK(count); | ||
602 | bool allocated; | ||
603 | u64 end_offset; | ||
604 | |||
605 | while (len) { | ||
606 | f2fs_balance_fs(sbi); | ||
607 | f2fs_lock_op(sbi); | ||
608 | |||
609 | /* When reading holes, we need its node page */ | ||
610 | set_new_dnode(&dn, inode, NULL, NULL, 0); | ||
611 | if (get_dnode_of_data(&dn, start, ALLOC_NODE)) | ||
612 | goto out; | ||
613 | |||
614 | allocated = false; | ||
615 | end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); | ||
616 | |||
617 | while (dn.ofs_in_node < end_offset && len) { | ||
618 | if (dn.data_blkaddr == NULL_ADDR) { | ||
619 | if (__allocate_data_block(&dn)) | ||
620 | goto sync_out; | ||
621 | allocated = true; | ||
622 | } | ||
623 | len--; | ||
624 | start++; | ||
625 | dn.ofs_in_node++; | ||
626 | } | ||
627 | |||
628 | if (allocated) | ||
629 | sync_inode_page(&dn); | ||
630 | |||
631 | f2fs_put_dnode(&dn); | ||
632 | f2fs_unlock_op(sbi); | ||
633 | } | ||
634 | return; | ||
635 | |||
636 | sync_out: | ||
637 | if (allocated) | ||
638 | sync_inode_page(&dn); | ||
639 | f2fs_put_dnode(&dn); | ||
640 | out: | ||
641 | f2fs_unlock_op(sbi); | ||
642 | return; | ||
643 | } | ||
644 | |||
588 | /* | 645 | /* |
589 | * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh. | 646 | * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh. |
590 | * If original data blocks are allocated, then give them to blockdev. | 647 | * If original data blocks are allocated, then give them to blockdev. |
@@ -610,10 +667,8 @@ static int __get_data_block(struct inode *inode, sector_t iblock, | |||
610 | if (check_extent_cache(inode, pgofs, bh_result)) | 667 | if (check_extent_cache(inode, pgofs, bh_result)) |
611 | goto out; | 668 | goto out; |
612 | 669 | ||
613 | if (create) { | 670 | if (create) |
614 | f2fs_balance_fs(F2FS_I_SB(inode)); | ||
615 | f2fs_lock_op(F2FS_I_SB(inode)); | 671 | f2fs_lock_op(F2FS_I_SB(inode)); |
616 | } | ||
617 | 672 | ||
618 | /* When reading holes, we need its node page */ | 673 | /* When reading holes, we need its node page */ |
619 | set_new_dnode(&dn, inode, NULL, NULL, 0); | 674 | set_new_dnode(&dn, inode, NULL, NULL, 0); |
@@ -627,12 +682,14 @@ static int __get_data_block(struct inode *inode, sector_t iblock, | |||
627 | goto put_out; | 682 | goto put_out; |
628 | 683 | ||
629 | if (dn.data_blkaddr != NULL_ADDR) { | 684 | if (dn.data_blkaddr != NULL_ADDR) { |
685 | set_buffer_new(bh_result); | ||
630 | map_bh(bh_result, inode->i_sb, dn.data_blkaddr); | 686 | map_bh(bh_result, inode->i_sb, dn.data_blkaddr); |
631 | } else if (create) { | 687 | } else if (create) { |
632 | err = __allocate_data_block(&dn); | 688 | err = __allocate_data_block(&dn); |
633 | if (err) | 689 | if (err) |
634 | goto put_out; | 690 | goto put_out; |
635 | allocated = true; | 691 | allocated = true; |
692 | set_buffer_new(bh_result); | ||
636 | map_bh(bh_result, inode->i_sb, dn.data_blkaddr); | 693 | map_bh(bh_result, inode->i_sb, dn.data_blkaddr); |
637 | } else { | 694 | } else { |
638 | goto put_out; | 695 | goto put_out; |
@@ -745,7 +802,6 @@ static int f2fs_read_data_pages(struct file *file, | |||
745 | int do_write_data_page(struct page *page, struct f2fs_io_info *fio) | 802 | int do_write_data_page(struct page *page, struct f2fs_io_info *fio) |
746 | { | 803 | { |
747 | struct inode *inode = page->mapping->host; | 804 | struct inode *inode = page->mapping->host; |
748 | block_t old_blkaddr, new_blkaddr; | ||
749 | struct dnode_of_data dn; | 805 | struct dnode_of_data dn; |
750 | int err = 0; | 806 | int err = 0; |
751 | 807 | ||
@@ -754,10 +810,10 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) | |||
754 | if (err) | 810 | if (err) |
755 | return err; | 811 | return err; |
756 | 812 | ||
757 | old_blkaddr = dn.data_blkaddr; | 813 | fio->blk_addr = dn.data_blkaddr; |
758 | 814 | ||
759 | /* This page is already truncated */ | 815 | /* This page is already truncated */ |
760 | if (old_blkaddr == NULL_ADDR) | 816 | if (fio->blk_addr == NULL_ADDR) |
761 | goto out_writepage; | 817 | goto out_writepage; |
762 | 818 | ||
763 | set_page_writeback(page); | 819 | set_page_writeback(page); |
@@ -766,14 +822,14 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) | |||
766 | * If current allocation needs SSR, | 822 | * If current allocation needs SSR, |
767 | * it had better in-place writes for updated data. | 823 | * it had better in-place writes for updated data. |
768 | */ | 824 | */ |
769 | if (unlikely(old_blkaddr != NEW_ADDR && | 825 | if (unlikely(fio->blk_addr != NEW_ADDR && |
770 | !is_cold_data(page) && | 826 | !is_cold_data(page) && |
771 | need_inplace_update(inode))) { | 827 | need_inplace_update(inode))) { |
772 | rewrite_data_page(page, old_blkaddr, fio); | 828 | rewrite_data_page(page, fio); |
773 | set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); | 829 | set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); |
774 | } else { | 830 | } else { |
775 | write_data_page(page, &dn, &new_blkaddr, fio); | 831 | write_data_page(page, &dn, fio); |
776 | update_extent_cache(new_blkaddr, &dn); | 832 | update_extent_cache(&dn); |
777 | set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); | 833 | set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); |
778 | } | 834 | } |
779 | out_writepage: | 835 | out_writepage: |
@@ -812,7 +868,12 @@ static int f2fs_write_data_page(struct page *page, | |||
812 | 868 | ||
813 | zero_user_segment(page, offset, PAGE_CACHE_SIZE); | 869 | zero_user_segment(page, offset, PAGE_CACHE_SIZE); |
814 | write: | 870 | write: |
815 | if (unlikely(sbi->por_doing)) | 871 | if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) |
872 | goto redirty_out; | ||
873 | if (f2fs_is_drop_cache(inode)) | ||
874 | goto out; | ||
875 | if (f2fs_is_volatile_file(inode) && !wbc->for_reclaim && | ||
876 | available_free_memory(sbi, BASE_CHECK)) | ||
816 | goto redirty_out; | 877 | goto redirty_out; |
817 | 878 | ||
818 | /* Dentry blocks are controlled by checkpoint */ | 879 | /* Dentry blocks are controlled by checkpoint */ |
@@ -826,7 +887,6 @@ write: | |||
826 | /* we should bypass data pages to proceed the kworkder jobs */ | 887 | /* we should bypass data pages to proceed the kworkder jobs */ |
827 | if (unlikely(f2fs_cp_error(sbi))) { | 888 | if (unlikely(f2fs_cp_error(sbi))) { |
828 | SetPageError(page); | 889 | SetPageError(page); |
829 | unlock_page(page); | ||
830 | goto out; | 890 | goto out; |
831 | } | 891 | } |
832 | 892 | ||
@@ -1002,8 +1062,12 @@ put_next: | |||
1002 | if (dn.data_blkaddr == NEW_ADDR) { | 1062 | if (dn.data_blkaddr == NEW_ADDR) { |
1003 | zero_user_segment(page, 0, PAGE_CACHE_SIZE); | 1063 | zero_user_segment(page, 0, PAGE_CACHE_SIZE); |
1004 | } else { | 1064 | } else { |
1005 | err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, | 1065 | struct f2fs_io_info fio = { |
1006 | READ_SYNC); | 1066 | .type = DATA, |
1067 | .rw = READ_SYNC, | ||
1068 | .blk_addr = dn.data_blkaddr, | ||
1069 | }; | ||
1070 | err = f2fs_submit_page_bio(sbi, page, &fio); | ||
1007 | if (err) | 1071 | if (err) |
1008 | goto fail; | 1072 | goto fail; |
1009 | 1073 | ||
@@ -1092,6 +1156,9 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, | |||
1092 | 1156 | ||
1093 | trace_f2fs_direct_IO_enter(inode, offset, count, rw); | 1157 | trace_f2fs_direct_IO_enter(inode, offset, count, rw); |
1094 | 1158 | ||
1159 | if (rw & WRITE) | ||
1160 | __allocate_data_blocks(inode, offset, count); | ||
1161 | |||
1095 | err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); | 1162 | err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); |
1096 | if (err < 0 && (rw & WRITE)) | 1163 | if (err < 0 && (rw & WRITE)) |
1097 | f2fs_write_failed(mapping, offset + count); | 1164 | f2fs_write_failed(mapping, offset + count); |
@@ -1101,24 +1168,33 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, | |||
1101 | return err; | 1168 | return err; |
1102 | } | 1169 | } |
1103 | 1170 | ||
1104 | static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, | 1171 | void f2fs_invalidate_page(struct page *page, unsigned int offset, |
1105 | unsigned int length) | 1172 | unsigned int length) |
1106 | { | 1173 | { |
1107 | struct inode *inode = page->mapping->host; | 1174 | struct inode *inode = page->mapping->host; |
1175 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
1108 | 1176 | ||
1109 | if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE) | 1177 | if (inode->i_ino >= F2FS_ROOT_INO(sbi) && |
1178 | (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE)) | ||
1110 | return; | 1179 | return; |
1111 | 1180 | ||
1112 | if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) | 1181 | if (PageDirty(page)) { |
1113 | invalidate_inmem_page(inode, page); | 1182 | if (inode->i_ino == F2FS_META_INO(sbi)) |
1114 | 1183 | dec_page_count(sbi, F2FS_DIRTY_META); | |
1115 | if (PageDirty(page)) | 1184 | else if (inode->i_ino == F2FS_NODE_INO(sbi)) |
1116 | inode_dec_dirty_pages(inode); | 1185 | dec_page_count(sbi, F2FS_DIRTY_NODES); |
1186 | else | ||
1187 | inode_dec_dirty_pages(inode); | ||
1188 | } | ||
1117 | ClearPagePrivate(page); | 1189 | ClearPagePrivate(page); |
1118 | } | 1190 | } |
1119 | 1191 | ||
1120 | static int f2fs_release_data_page(struct page *page, gfp_t wait) | 1192 | int f2fs_release_page(struct page *page, gfp_t wait) |
1121 | { | 1193 | { |
1194 | /* If this is dirty page, keep PagePrivate */ | ||
1195 | if (PageDirty(page)) | ||
1196 | return 0; | ||
1197 | |||
1122 | ClearPagePrivate(page); | 1198 | ClearPagePrivate(page); |
1123 | return 1; | 1199 | return 1; |
1124 | } | 1200 | } |
@@ -1132,7 +1208,7 @@ static int f2fs_set_data_page_dirty(struct page *page) | |||
1132 | 1208 | ||
1133 | SetPageUptodate(page); | 1209 | SetPageUptodate(page); |
1134 | 1210 | ||
1135 | if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) { | 1211 | if (f2fs_is_atomic_file(inode)) { |
1136 | register_inmem_page(inode, page); | 1212 | register_inmem_page(inode, page); |
1137 | return 1; | 1213 | return 1; |
1138 | } | 1214 | } |
@@ -1168,8 +1244,8 @@ const struct address_space_operations f2fs_dblock_aops = { | |||
1168 | .write_begin = f2fs_write_begin, | 1244 | .write_begin = f2fs_write_begin, |
1169 | .write_end = f2fs_write_end, | 1245 | .write_end = f2fs_write_end, |
1170 | .set_page_dirty = f2fs_set_data_page_dirty, | 1246 | .set_page_dirty = f2fs_set_data_page_dirty, |
1171 | .invalidatepage = f2fs_invalidate_data_page, | 1247 | .invalidatepage = f2fs_invalidate_page, |
1172 | .releasepage = f2fs_release_data_page, | 1248 | .releasepage = f2fs_release_page, |
1173 | .direct_IO = f2fs_direct_IO, | 1249 | .direct_IO = f2fs_direct_IO, |
1174 | .bmap = f2fs_bmap, | 1250 | .bmap = f2fs_bmap, |
1175 | }; | 1251 | }; |
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 91e8f699ab30..e671373cc8ab 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c | |||
@@ -40,6 +40,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) | |||
40 | si->ndirty_dirs = sbi->n_dirty_dirs; | 40 | si->ndirty_dirs = sbi->n_dirty_dirs; |
41 | si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); | 41 | si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); |
42 | si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); | 42 | si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); |
43 | si->wb_pages = get_pages(sbi, F2FS_WRITEBACK); | ||
43 | si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; | 44 | si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; |
44 | si->rsvd_segs = reserved_segments(sbi); | 45 | si->rsvd_segs = reserved_segments(sbi); |
45 | si->overp_segs = overprovision_segments(sbi); | 46 | si->overp_segs = overprovision_segments(sbi); |
@@ -57,7 +58,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) | |||
57 | si->node_pages = NODE_MAPPING(sbi)->nrpages; | 58 | si->node_pages = NODE_MAPPING(sbi)->nrpages; |
58 | si->meta_pages = META_MAPPING(sbi)->nrpages; | 59 | si->meta_pages = META_MAPPING(sbi)->nrpages; |
59 | si->nats = NM_I(sbi)->nat_cnt; | 60 | si->nats = NM_I(sbi)->nat_cnt; |
60 | si->sits = SIT_I(sbi)->dirty_sentries; | 61 | si->dirty_nats = NM_I(sbi)->dirty_nat_cnt; |
62 | si->sits = MAIN_SEGS(sbi); | ||
63 | si->dirty_sits = SIT_I(sbi)->dirty_sentries; | ||
61 | si->fnids = NM_I(sbi)->fcnt; | 64 | si->fnids = NM_I(sbi)->fcnt; |
62 | si->bg_gc = sbi->bg_gc; | 65 | si->bg_gc = sbi->bg_gc; |
63 | si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) | 66 | si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) |
@@ -79,6 +82,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) | |||
79 | si->segment_count[i] = sbi->segment_count[i]; | 82 | si->segment_count[i] = sbi->segment_count[i]; |
80 | si->block_count[i] = sbi->block_count[i]; | 83 | si->block_count[i] = sbi->block_count[i]; |
81 | } | 84 | } |
85 | |||
86 | si->inplace_count = atomic_read(&sbi->inplace_count); | ||
82 | } | 87 | } |
83 | 88 | ||
84 | /* | 89 | /* |
@@ -137,6 +142,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) | |||
137 | si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry); | 142 | si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry); |
138 | si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); | 143 | si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); |
139 | si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); | 144 | si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); |
145 | si->base_mem += SIT_VBLOCK_MAP_SIZE; | ||
140 | if (sbi->segs_per_sec > 1) | 146 | if (sbi->segs_per_sec > 1) |
141 | si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); | 147 | si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); |
142 | si->base_mem += __bitmap_size(sbi, SIT_BITMAP); | 148 | si->base_mem += __bitmap_size(sbi, SIT_BITMAP); |
@@ -159,20 +165,32 @@ static void update_mem_info(struct f2fs_sb_info *sbi) | |||
159 | si->base_mem += sizeof(struct f2fs_nm_info); | 165 | si->base_mem += sizeof(struct f2fs_nm_info); |
160 | si->base_mem += __bitmap_size(sbi, NAT_BITMAP); | 166 | si->base_mem += __bitmap_size(sbi, NAT_BITMAP); |
161 | 167 | ||
168 | get_cache: | ||
169 | si->cache_mem = 0; | ||
170 | |||
162 | /* build gc */ | 171 | /* build gc */ |
163 | si->base_mem += sizeof(struct f2fs_gc_kthread); | 172 | if (sbi->gc_thread) |
173 | si->cache_mem += sizeof(struct f2fs_gc_kthread); | ||
174 | |||
175 | /* build merge flush thread */ | ||
176 | if (SM_I(sbi)->cmd_control_info) | ||
177 | si->cache_mem += sizeof(struct flush_cmd_control); | ||
164 | 178 | ||
165 | get_cache: | ||
166 | /* free nids */ | 179 | /* free nids */ |
167 | si->cache_mem = NM_I(sbi)->fcnt; | 180 | si->cache_mem += NM_I(sbi)->fcnt * sizeof(struct free_nid); |
168 | si->cache_mem += NM_I(sbi)->nat_cnt; | 181 | si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry); |
169 | npages = NODE_MAPPING(sbi)->nrpages; | 182 | si->cache_mem += NM_I(sbi)->dirty_nat_cnt * |
170 | si->cache_mem += npages << PAGE_CACHE_SHIFT; | 183 | sizeof(struct nat_entry_set); |
171 | npages = META_MAPPING(sbi)->nrpages; | 184 | si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); |
172 | si->cache_mem += npages << PAGE_CACHE_SHIFT; | 185 | si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry); |
173 | si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); | ||
174 | for (i = 0; i <= UPDATE_INO; i++) | 186 | for (i = 0; i <= UPDATE_INO; i++) |
175 | si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); | 187 | si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); |
188 | |||
189 | si->page_mem = 0; | ||
190 | npages = NODE_MAPPING(sbi)->nrpages; | ||
191 | si->page_mem += npages << PAGE_CACHE_SHIFT; | ||
192 | npages = META_MAPPING(sbi)->nrpages; | ||
193 | si->page_mem += npages << PAGE_CACHE_SHIFT; | ||
176 | } | 194 | } |
177 | 195 | ||
178 | static int stat_show(struct seq_file *s, void *v) | 196 | static int stat_show(struct seq_file *s, void *v) |
@@ -250,16 +268,16 @@ static int stat_show(struct seq_file *s, void *v) | |||
250 | seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", | 268 | seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", |
251 | si->hit_ext, si->total_ext); | 269 | si->hit_ext, si->total_ext); |
252 | seq_puts(s, "\nBalancing F2FS Async:\n"); | 270 | seq_puts(s, "\nBalancing F2FS Async:\n"); |
253 | seq_printf(s, " - inmem: %4d\n", | 271 | seq_printf(s, " - inmem: %4d, wb: %4d\n", |
254 | si->inmem_pages); | 272 | si->inmem_pages, si->wb_pages); |
255 | seq_printf(s, " - nodes: %4d in %4d\n", | 273 | seq_printf(s, " - nodes: %4d in %4d\n", |
256 | si->ndirty_node, si->node_pages); | 274 | si->ndirty_node, si->node_pages); |
257 | seq_printf(s, " - dents: %4d in dirs:%4d\n", | 275 | seq_printf(s, " - dents: %4d in dirs:%4d\n", |
258 | si->ndirty_dent, si->ndirty_dirs); | 276 | si->ndirty_dent, si->ndirty_dirs); |
259 | seq_printf(s, " - meta: %4d in %4d\n", | 277 | seq_printf(s, " - meta: %4d in %4d\n", |
260 | si->ndirty_meta, si->meta_pages); | 278 | si->ndirty_meta, si->meta_pages); |
261 | seq_printf(s, " - NATs: %9d\n - SITs: %9d\n", | 279 | seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", |
262 | si->nats, si->sits); | 280 | si->dirty_nats, si->nats, si->dirty_sits, si->sits); |
263 | seq_printf(s, " - free_nids: %9d\n", | 281 | seq_printf(s, " - free_nids: %9d\n", |
264 | si->fnids); | 282 | si->fnids); |
265 | seq_puts(s, "\nDistribution of User Blocks:"); | 283 | seq_puts(s, "\nDistribution of User Blocks:"); |
@@ -277,6 +295,7 @@ static int stat_show(struct seq_file *s, void *v) | |||
277 | for (j = 0; j < si->util_free; j++) | 295 | for (j = 0; j < si->util_free; j++) |
278 | seq_putc(s, '-'); | 296 | seq_putc(s, '-'); |
279 | seq_puts(s, "]\n\n"); | 297 | seq_puts(s, "]\n\n"); |
298 | seq_printf(s, "IPU: %u blocks\n", si->inplace_count); | ||
280 | seq_printf(s, "SSR: %u blocks in %u segments\n", | 299 | seq_printf(s, "SSR: %u blocks in %u segments\n", |
281 | si->block_count[SSR], si->segment_count[SSR]); | 300 | si->block_count[SSR], si->segment_count[SSR]); |
282 | seq_printf(s, "LFS: %u blocks in %u segments\n", | 301 | seq_printf(s, "LFS: %u blocks in %u segments\n", |
@@ -289,9 +308,14 @@ static int stat_show(struct seq_file *s, void *v) | |||
289 | 308 | ||
290 | /* memory footprint */ | 309 | /* memory footprint */ |
291 | update_mem_info(si->sbi); | 310 | update_mem_info(si->sbi); |
292 | seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n", | 311 | seq_printf(s, "\nMemory: %u KB\n", |
293 | (si->base_mem + si->cache_mem) >> 10, | 312 | (si->base_mem + si->cache_mem + si->page_mem) >> 10); |
294 | si->base_mem >> 10, si->cache_mem >> 10); | 313 | seq_printf(s, " - static: %u KB\n", |
314 | si->base_mem >> 10); | ||
315 | seq_printf(s, " - cached: %u KB\n", | ||
316 | si->cache_mem >> 10); | ||
317 | seq_printf(s, " - paged : %u KB\n", | ||
318 | si->page_mem >> 10); | ||
295 | } | 319 | } |
296 | mutex_unlock(&f2fs_stat_mutex); | 320 | mutex_unlock(&f2fs_stat_mutex); |
297 | return 0; | 321 | return 0; |
@@ -331,6 +355,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) | |||
331 | 355 | ||
332 | atomic_set(&sbi->inline_inode, 0); | 356 | atomic_set(&sbi->inline_inode, 0); |
333 | atomic_set(&sbi->inline_dir, 0); | 357 | atomic_set(&sbi->inline_dir, 0); |
358 | atomic_set(&sbi->inplace_count, 0); | ||
334 | 359 | ||
335 | mutex_lock(&f2fs_stat_mutex); | 360 | mutex_lock(&f2fs_stat_mutex); |
336 | list_add_tail(&si->stat_list, &f2fs_stat_list); | 361 | list_add_tail(&si->stat_list, &f2fs_stat_list); |
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index b1a7d5737cd0..b74097a7f6d9 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c | |||
@@ -286,8 +286,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, | |||
286 | f2fs_wait_on_page_writeback(page, type); | 286 | f2fs_wait_on_page_writeback(page, type); |
287 | de->ino = cpu_to_le32(inode->i_ino); | 287 | de->ino = cpu_to_le32(inode->i_ino); |
288 | set_de_type(de, inode); | 288 | set_de_type(de, inode); |
289 | if (!f2fs_has_inline_dentry(dir)) | 289 | f2fs_dentry_kunmap(dir, page); |
290 | kunmap(page); | ||
291 | set_page_dirty(page); | 290 | set_page_dirty(page); |
292 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 291 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
293 | mark_inode_dirty(dir); | 292 | mark_inode_dirty(dir); |
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ec58bb2373fc..7fa3313ab0e2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h | |||
@@ -28,7 +28,7 @@ | |||
28 | do { \ | 28 | do { \ |
29 | if (unlikely(condition)) { \ | 29 | if (unlikely(condition)) { \ |
30 | WARN_ON(1); \ | 30 | WARN_ON(1); \ |
31 | sbi->need_fsck = true; \ | 31 | set_sbi_flag(sbi, SBI_NEED_FSCK); \ |
32 | } \ | 32 | } \ |
33 | } while (0) | 33 | } while (0) |
34 | #define f2fs_down_write(x, y) down_write(x) | 34 | #define f2fs_down_write(x, y) down_write(x) |
@@ -100,10 +100,15 @@ enum { | |||
100 | 100 | ||
101 | enum { | 101 | enum { |
102 | CP_UMOUNT, | 102 | CP_UMOUNT, |
103 | CP_FASTBOOT, | ||
103 | CP_SYNC, | 104 | CP_SYNC, |
104 | CP_DISCARD, | 105 | CP_DISCARD, |
105 | }; | 106 | }; |
106 | 107 | ||
108 | #define DEF_BATCHED_TRIM_SECTIONS 32 | ||
109 | #define BATCHED_TRIM_SEGMENTS(sbi) \ | ||
110 | (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec) | ||
111 | |||
107 | struct cp_control { | 112 | struct cp_control { |
108 | int reason; | 113 | int reason; |
109 | __u64 trim_start; | 114 | __u64 trim_start; |
@@ -136,8 +141,14 @@ struct ino_entry { | |||
136 | nid_t ino; /* inode number */ | 141 | nid_t ino; /* inode number */ |
137 | }; | 142 | }; |
138 | 143 | ||
139 | /* for the list of directory inodes */ | 144 | /* |
140 | struct dir_inode_entry { | 145 | * for the list of directory inodes or gc inodes. |
146 | * NOTE: there are two slab users for this structure, if we add/modify/delete | ||
147 | * fields in structure for one of slab users, it may affect fields or size of | ||
148 | * other one, in this condition, it's better to split both of slab and related | ||
149 | * data structure. | ||
150 | */ | ||
151 | struct inode_entry { | ||
141 | struct list_head list; /* list head */ | 152 | struct list_head list; /* list head */ |
142 | struct inode *inode; /* vfs inode pointer */ | 153 | struct inode *inode; /* vfs inode pointer */ |
143 | }; | 154 | }; |
@@ -196,11 +207,14 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, | |||
196 | */ | 207 | */ |
197 | #define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS | 208 | #define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS |
198 | #define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS | 209 | #define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS |
210 | #define F2FS_IOC_GETVERSION FS_IOC_GETVERSION | ||
199 | 211 | ||
200 | #define F2FS_IOCTL_MAGIC 0xf5 | 212 | #define F2FS_IOCTL_MAGIC 0xf5 |
201 | #define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) | 213 | #define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) |
202 | #define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) | 214 | #define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) |
203 | #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) | 215 | #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) |
216 | #define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) | ||
217 | #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) | ||
204 | 218 | ||
205 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) | 219 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) |
206 | /* | 220 | /* |
@@ -295,7 +309,7 @@ struct f2fs_inode_info { | |||
295 | nid_t i_xattr_nid; /* node id that contains xattrs */ | 309 | nid_t i_xattr_nid; /* node id that contains xattrs */ |
296 | unsigned long long xattr_ver; /* cp version of xattr modification */ | 310 | unsigned long long xattr_ver; /* cp version of xattr modification */ |
297 | struct extent_info ext; /* in-memory extent cache entry */ | 311 | struct extent_info ext; /* in-memory extent cache entry */ |
298 | struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ | 312 | struct inode_entry *dirty_dir; /* the pointer of dirty dir */ |
299 | 313 | ||
300 | struct radix_tree_root inmem_root; /* radix tree for inmem pages */ | 314 | struct radix_tree_root inmem_root; /* radix tree for inmem pages */ |
301 | struct list_head inmem_pages; /* inmemory pages managed by f2fs */ | 315 | struct list_head inmem_pages; /* inmemory pages managed by f2fs */ |
@@ -398,7 +412,8 @@ enum { | |||
398 | CURSEG_HOT_NODE, /* direct node blocks of directory files */ | 412 | CURSEG_HOT_NODE, /* direct node blocks of directory files */ |
399 | CURSEG_WARM_NODE, /* direct node blocks of normal files */ | 413 | CURSEG_WARM_NODE, /* direct node blocks of normal files */ |
400 | CURSEG_COLD_NODE, /* indirect node blocks */ | 414 | CURSEG_COLD_NODE, /* indirect node blocks */ |
401 | NO_CHECK_TYPE | 415 | NO_CHECK_TYPE, |
416 | CURSEG_DIRECT_IO, /* to use for the direct IO path */ | ||
402 | }; | 417 | }; |
403 | 418 | ||
404 | struct flush_cmd { | 419 | struct flush_cmd { |
@@ -437,6 +452,9 @@ struct f2fs_sm_info { | |||
437 | int nr_discards; /* # of discards in the list */ | 452 | int nr_discards; /* # of discards in the list */ |
438 | int max_discards; /* max. discards to be issued */ | 453 | int max_discards; /* max. discards to be issued */ |
439 | 454 | ||
455 | /* for batched trimming */ | ||
456 | unsigned int trim_sections; /* # of sections to trim */ | ||
457 | |||
440 | struct list_head sit_entry_set; /* sit entry set list */ | 458 | struct list_head sit_entry_set; /* sit entry set list */ |
441 | 459 | ||
442 | unsigned int ipu_policy; /* in-place-update policy */ | 460 | unsigned int ipu_policy; /* in-place-update policy */ |
@@ -489,6 +507,7 @@ enum page_type { | |||
489 | struct f2fs_io_info { | 507 | struct f2fs_io_info { |
490 | enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ | 508 | enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ |
491 | int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */ | 509 | int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */ |
510 | block_t blk_addr; /* block address to be written */ | ||
492 | }; | 511 | }; |
493 | 512 | ||
494 | #define is_read_io(rw) (((rw) & 1) == READ) | 513 | #define is_read_io(rw) (((rw) & 1) == READ) |
@@ -508,13 +527,20 @@ struct inode_management { | |||
508 | unsigned long ino_num; /* number of entries */ | 527 | unsigned long ino_num; /* number of entries */ |
509 | }; | 528 | }; |
510 | 529 | ||
530 | /* For s_flag in struct f2fs_sb_info */ | ||
531 | enum { | ||
532 | SBI_IS_DIRTY, /* dirty flag for checkpoint */ | ||
533 | SBI_IS_CLOSE, /* specify unmounting */ | ||
534 | SBI_NEED_FSCK, /* need fsck.f2fs to fix */ | ||
535 | SBI_POR_DOING, /* recovery is doing or not */ | ||
536 | }; | ||
537 | |||
511 | struct f2fs_sb_info { | 538 | struct f2fs_sb_info { |
512 | struct super_block *sb; /* pointer to VFS super block */ | 539 | struct super_block *sb; /* pointer to VFS super block */ |
513 | struct proc_dir_entry *s_proc; /* proc entry */ | 540 | struct proc_dir_entry *s_proc; /* proc entry */ |
514 | struct buffer_head *raw_super_buf; /* buffer head of raw sb */ | 541 | struct buffer_head *raw_super_buf; /* buffer head of raw sb */ |
515 | struct f2fs_super_block *raw_super; /* raw super block pointer */ | 542 | struct f2fs_super_block *raw_super; /* raw super block pointer */ |
516 | int s_dirty; /* dirty flag for checkpoint */ | 543 | int s_flag; /* flags for sbi */ |
517 | bool need_fsck; /* need fsck.f2fs to fix */ | ||
518 | 544 | ||
519 | /* for node-related operations */ | 545 | /* for node-related operations */ |
520 | struct f2fs_nm_info *nm_info; /* node manager */ | 546 | struct f2fs_nm_info *nm_info; /* node manager */ |
@@ -534,7 +560,6 @@ struct f2fs_sb_info { | |||
534 | struct rw_semaphore cp_rwsem; /* blocking FS operations */ | 560 | struct rw_semaphore cp_rwsem; /* blocking FS operations */ |
535 | struct rw_semaphore node_write; /* locking node writes */ | 561 | struct rw_semaphore node_write; /* locking node writes */ |
536 | struct mutex writepages; /* mutex for writepages() */ | 562 | struct mutex writepages; /* mutex for writepages() */ |
537 | bool por_doing; /* recovery is doing or not */ | ||
538 | wait_queue_head_t cp_wait; | 563 | wait_queue_head_t cp_wait; |
539 | 564 | ||
540 | struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ | 565 | struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ |
@@ -589,6 +614,7 @@ struct f2fs_sb_info { | |||
589 | struct f2fs_stat_info *stat_info; /* FS status information */ | 614 | struct f2fs_stat_info *stat_info; /* FS status information */ |
590 | unsigned int segment_count[2]; /* # of allocated segments */ | 615 | unsigned int segment_count[2]; /* # of allocated segments */ |
591 | unsigned int block_count[2]; /* # of allocated blocks */ | 616 | unsigned int block_count[2]; /* # of allocated blocks */ |
617 | atomic_t inplace_count; /* # of inplace update */ | ||
592 | int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ | 618 | int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ |
593 | atomic_t inline_inode; /* # of inline_data inodes */ | 619 | atomic_t inline_inode; /* # of inline_data inodes */ |
594 | atomic_t inline_dir; /* # of inline_dentry inodes */ | 620 | atomic_t inline_dir; /* # of inline_dentry inodes */ |
@@ -686,14 +712,19 @@ static inline struct address_space *NODE_MAPPING(struct f2fs_sb_info *sbi) | |||
686 | return sbi->node_inode->i_mapping; | 712 | return sbi->node_inode->i_mapping; |
687 | } | 713 | } |
688 | 714 | ||
689 | static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi) | 715 | static inline bool is_sbi_flag_set(struct f2fs_sb_info *sbi, unsigned int type) |
690 | { | 716 | { |
691 | sbi->s_dirty = 1; | 717 | return sbi->s_flag & (0x01 << type); |
692 | } | 718 | } |
693 | 719 | ||
694 | static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi) | 720 | static inline void set_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type) |
695 | { | 721 | { |
696 | sbi->s_dirty = 0; | 722 | sbi->s_flag |= (0x01 << type); |
723 | } | ||
724 | |||
725 | static inline void clear_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type) | ||
726 | { | ||
727 | sbi->s_flag &= ~(0x01 << type); | ||
697 | } | 728 | } |
698 | 729 | ||
699 | static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) | 730 | static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) |
@@ -741,6 +772,28 @@ static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) | |||
741 | up_write(&sbi->cp_rwsem); | 772 | up_write(&sbi->cp_rwsem); |
742 | } | 773 | } |
743 | 774 | ||
775 | static inline int __get_cp_reason(struct f2fs_sb_info *sbi) | ||
776 | { | ||
777 | int reason = CP_SYNC; | ||
778 | |||
779 | if (test_opt(sbi, FASTBOOT)) | ||
780 | reason = CP_FASTBOOT; | ||
781 | if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) | ||
782 | reason = CP_UMOUNT; | ||
783 | return reason; | ||
784 | } | ||
785 | |||
786 | static inline bool __remain_node_summaries(int reason) | ||
787 | { | ||
788 | return (reason == CP_UMOUNT || reason == CP_FASTBOOT); | ||
789 | } | ||
790 | |||
791 | static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi) | ||
792 | { | ||
793 | return (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) || | ||
794 | is_set_ckpt_flags(F2FS_CKPT(sbi), CP_FASTBOOT_FLAG)); | ||
795 | } | ||
796 | |||
744 | /* | 797 | /* |
745 | * Check whether the given nid is within node id range. | 798 | * Check whether the given nid is within node id range. |
746 | */ | 799 | */ |
@@ -805,7 +858,7 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, | |||
805 | static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) | 858 | static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) |
806 | { | 859 | { |
807 | atomic_inc(&sbi->nr_pages[count_type]); | 860 | atomic_inc(&sbi->nr_pages[count_type]); |
808 | F2FS_SET_SB_DIRT(sbi); | 861 | set_sbi_flag(sbi, SBI_IS_DIRTY); |
809 | } | 862 | } |
810 | 863 | ||
811 | static inline void inode_inc_dirty_pages(struct inode *inode) | 864 | static inline void inode_inc_dirty_pages(struct inode *inode) |
@@ -1113,6 +1166,7 @@ enum { | |||
1113 | FI_NEED_IPU, /* used for ipu per file */ | 1166 | FI_NEED_IPU, /* used for ipu per file */ |
1114 | FI_ATOMIC_FILE, /* indicate atomic file */ | 1167 | FI_ATOMIC_FILE, /* indicate atomic file */ |
1115 | FI_VOLATILE_FILE, /* indicate volatile file */ | 1168 | FI_VOLATILE_FILE, /* indicate volatile file */ |
1169 | FI_DROP_CACHE, /* drop dirty page cache */ | ||
1116 | FI_DATA_EXIST, /* indicate data exists */ | 1170 | FI_DATA_EXIST, /* indicate data exists */ |
1117 | }; | 1171 | }; |
1118 | 1172 | ||
@@ -1220,6 +1274,11 @@ static inline bool f2fs_is_volatile_file(struct inode *inode) | |||
1220 | return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE); | 1274 | return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE); |
1221 | } | 1275 | } |
1222 | 1276 | ||
1277 | static inline bool f2fs_is_drop_cache(struct inode *inode) | ||
1278 | { | ||
1279 | return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE); | ||
1280 | } | ||
1281 | |||
1223 | static inline void *inline_data_addr(struct page *page) | 1282 | static inline void *inline_data_addr(struct page *page) |
1224 | { | 1283 | { |
1225 | struct f2fs_inode *ri = F2FS_INODE(page); | 1284 | struct f2fs_inode *ri = F2FS_INODE(page); |
@@ -1389,7 +1448,6 @@ void destroy_node_manager_caches(void); | |||
1389 | * segment.c | 1448 | * segment.c |
1390 | */ | 1449 | */ |
1391 | void register_inmem_page(struct inode *, struct page *); | 1450 | void register_inmem_page(struct inode *, struct page *); |
1392 | void invalidate_inmem_page(struct inode *, struct page *); | ||
1393 | void commit_inmem_pages(struct inode *, bool); | 1451 | void commit_inmem_pages(struct inode *, bool); |
1394 | void f2fs_balance_fs(struct f2fs_sb_info *); | 1452 | void f2fs_balance_fs(struct f2fs_sb_info *); |
1395 | void f2fs_balance_fs_bg(struct f2fs_sb_info *); | 1453 | void f2fs_balance_fs_bg(struct f2fs_sb_info *); |
@@ -1401,16 +1459,16 @@ void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); | |||
1401 | void clear_prefree_segments(struct f2fs_sb_info *); | 1459 | void clear_prefree_segments(struct f2fs_sb_info *); |
1402 | void release_discard_addrs(struct f2fs_sb_info *); | 1460 | void release_discard_addrs(struct f2fs_sb_info *); |
1403 | void discard_next_dnode(struct f2fs_sb_info *, block_t); | 1461 | void discard_next_dnode(struct f2fs_sb_info *, block_t); |
1404 | int npages_for_summary_flush(struct f2fs_sb_info *); | 1462 | int npages_for_summary_flush(struct f2fs_sb_info *, bool); |
1405 | void allocate_new_segments(struct f2fs_sb_info *); | 1463 | void allocate_new_segments(struct f2fs_sb_info *); |
1406 | int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); | 1464 | int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); |
1407 | struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); | 1465 | struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); |
1408 | void write_meta_page(struct f2fs_sb_info *, struct page *); | 1466 | void write_meta_page(struct f2fs_sb_info *, struct page *); |
1409 | void write_node_page(struct f2fs_sb_info *, struct page *, | 1467 | void write_node_page(struct f2fs_sb_info *, struct page *, |
1410 | struct f2fs_io_info *, unsigned int, block_t, block_t *); | 1468 | unsigned int, struct f2fs_io_info *); |
1411 | void write_data_page(struct page *, struct dnode_of_data *, block_t *, | 1469 | void write_data_page(struct page *, struct dnode_of_data *, |
1412 | struct f2fs_io_info *); | 1470 | struct f2fs_io_info *); |
1413 | void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *); | 1471 | void rewrite_data_page(struct page *, struct f2fs_io_info *); |
1414 | void recover_data_page(struct f2fs_sb_info *, struct page *, | 1472 | void recover_data_page(struct f2fs_sb_info *, struct page *, |
1415 | struct f2fs_summary *, block_t, block_t); | 1473 | struct f2fs_summary *, block_t, block_t); |
1416 | void allocate_data_block(struct f2fs_sb_info *, struct page *, | 1474 | void allocate_data_block(struct f2fs_sb_info *, struct page *, |
@@ -1457,17 +1515,20 @@ void destroy_checkpoint_caches(void); | |||
1457 | * data.c | 1515 | * data.c |
1458 | */ | 1516 | */ |
1459 | void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int); | 1517 | void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int); |
1460 | int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, block_t, int); | 1518 | int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, |
1461 | void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, block_t, | 1519 | struct f2fs_io_info *); |
1520 | void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, | ||
1462 | struct f2fs_io_info *); | 1521 | struct f2fs_io_info *); |
1463 | int reserve_new_block(struct dnode_of_data *); | 1522 | int reserve_new_block(struct dnode_of_data *); |
1464 | int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); | 1523 | int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); |
1465 | void update_extent_cache(block_t, struct dnode_of_data *); | 1524 | void update_extent_cache(struct dnode_of_data *); |
1466 | struct page *find_data_page(struct inode *, pgoff_t, bool); | 1525 | struct page *find_data_page(struct inode *, pgoff_t, bool); |
1467 | struct page *get_lock_data_page(struct inode *, pgoff_t); | 1526 | struct page *get_lock_data_page(struct inode *, pgoff_t); |
1468 | struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); | 1527 | struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); |
1469 | int do_write_data_page(struct page *, struct f2fs_io_info *); | 1528 | int do_write_data_page(struct page *, struct f2fs_io_info *); |
1470 | int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); | 1529 | int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); |
1530 | void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); | ||
1531 | int f2fs_release_page(struct page *, gfp_t); | ||
1471 | 1532 | ||
1472 | /* | 1533 | /* |
1473 | * gc.c | 1534 | * gc.c |
@@ -1477,8 +1538,6 @@ void stop_gc_thread(struct f2fs_sb_info *); | |||
1477 | block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *); | 1538 | block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *); |
1478 | int f2fs_gc(struct f2fs_sb_info *); | 1539 | int f2fs_gc(struct f2fs_sb_info *); |
1479 | void build_gc_manager(struct f2fs_sb_info *); | 1540 | void build_gc_manager(struct f2fs_sb_info *); |
1480 | int __init create_gc_caches(void); | ||
1481 | void destroy_gc_caches(void); | ||
1482 | 1541 | ||
1483 | /* | 1542 | /* |
1484 | * recovery.c | 1543 | * recovery.c |
@@ -1497,9 +1556,9 @@ struct f2fs_stat_info { | |||
1497 | int main_area_segs, main_area_sections, main_area_zones; | 1556 | int main_area_segs, main_area_sections, main_area_zones; |
1498 | int hit_ext, total_ext; | 1557 | int hit_ext, total_ext; |
1499 | int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; | 1558 | int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; |
1500 | int nats, sits, fnids; | 1559 | int nats, dirty_nats, sits, dirty_sits, fnids; |
1501 | int total_count, utilization; | 1560 | int total_count, utilization; |
1502 | int bg_gc, inline_inode, inline_dir, inmem_pages; | 1561 | int bg_gc, inline_inode, inline_dir, inmem_pages, wb_pages; |
1503 | unsigned int valid_count, valid_node_count, valid_inode_count; | 1562 | unsigned int valid_count, valid_node_count, valid_inode_count; |
1504 | unsigned int bimodal, avg_vblocks; | 1563 | unsigned int bimodal, avg_vblocks; |
1505 | int util_free, util_valid, util_invalid; | 1564 | int util_free, util_valid, util_invalid; |
@@ -1514,7 +1573,8 @@ struct f2fs_stat_info { | |||
1514 | 1573 | ||
1515 | unsigned int segment_count[2]; | 1574 | unsigned int segment_count[2]; |
1516 | unsigned int block_count[2]; | 1575 | unsigned int block_count[2]; |
1517 | unsigned base_mem, cache_mem; | 1576 | unsigned int inplace_count; |
1577 | unsigned base_mem, cache_mem, page_mem; | ||
1518 | }; | 1578 | }; |
1519 | 1579 | ||
1520 | static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) | 1580 | static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) |
@@ -1553,7 +1613,8 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) | |||
1553 | ((sbi)->segment_count[(curseg)->alloc_type]++) | 1613 | ((sbi)->segment_count[(curseg)->alloc_type]++) |
1554 | #define stat_inc_block_count(sbi, curseg) \ | 1614 | #define stat_inc_block_count(sbi, curseg) \ |
1555 | ((sbi)->block_count[(curseg)->alloc_type]++) | 1615 | ((sbi)->block_count[(curseg)->alloc_type]++) |
1556 | 1616 | #define stat_inc_inplace_blocks(sbi) \ | |
1617 | (atomic_inc(&(sbi)->inplace_count)) | ||
1557 | #define stat_inc_seg_count(sbi, type) \ | 1618 | #define stat_inc_seg_count(sbi, type) \ |
1558 | do { \ | 1619 | do { \ |
1559 | struct f2fs_stat_info *si = F2FS_STAT(sbi); \ | 1620 | struct f2fs_stat_info *si = F2FS_STAT(sbi); \ |
@@ -1599,6 +1660,7 @@ void f2fs_destroy_root_stats(void); | |||
1599 | #define stat_dec_inline_dir(inode) | 1660 | #define stat_dec_inline_dir(inode) |
1600 | #define stat_inc_seg_type(sbi, curseg) | 1661 | #define stat_inc_seg_type(sbi, curseg) |
1601 | #define stat_inc_block_count(sbi, curseg) | 1662 | #define stat_inc_block_count(sbi, curseg) |
1663 | #define stat_inc_inplace_blocks(sbi) | ||
1602 | #define stat_inc_seg_count(si, type) | 1664 | #define stat_inc_seg_count(si, type) |
1603 | #define stat_inc_tot_blk_count(si, blks) | 1665 | #define stat_inc_tot_blk_count(si, blks) |
1604 | #define stat_inc_data_blk_count(si, blks) | 1666 | #define stat_inc_data_blk_count(si, blks) |
@@ -1619,6 +1681,7 @@ extern const struct address_space_operations f2fs_meta_aops; | |||
1619 | extern const struct inode_operations f2fs_dir_inode_operations; | 1681 | extern const struct inode_operations f2fs_dir_inode_operations; |
1620 | extern const struct inode_operations f2fs_symlink_inode_operations; | 1682 | extern const struct inode_operations f2fs_symlink_inode_operations; |
1621 | extern const struct inode_operations f2fs_special_inode_operations; | 1683 | extern const struct inode_operations f2fs_special_inode_operations; |
1684 | extern struct kmem_cache *inode_entry_slab; | ||
1622 | 1685 | ||
1623 | /* | 1686 | /* |
1624 | * inline.c | 1687 | * inline.c |
@@ -1629,7 +1692,6 @@ int f2fs_read_inline_data(struct inode *, struct page *); | |||
1629 | int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); | 1692 | int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); |
1630 | int f2fs_convert_inline_inode(struct inode *); | 1693 | int f2fs_convert_inline_inode(struct inode *); |
1631 | int f2fs_write_inline_data(struct inode *, struct page *); | 1694 | int f2fs_write_inline_data(struct inode *, struct page *); |
1632 | void truncate_inline_data(struct page *, u64); | ||
1633 | bool recover_inline_data(struct inode *, struct page *); | 1695 | bool recover_inline_data(struct inode *, struct page *); |
1634 | struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *, | 1696 | struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *, |
1635 | struct page **); | 1697 | struct page **); |
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3c27e0ecb3bc..98dac27bc3f7 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include "segment.h" | 26 | #include "segment.h" |
27 | #include "xattr.h" | 27 | #include "xattr.h" |
28 | #include "acl.h" | 28 | #include "acl.h" |
29 | #include "trace.h" | ||
29 | #include <trace/events/f2fs.h> | 30 | #include <trace/events/f2fs.h> |
30 | 31 | ||
31 | static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, | 32 | static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, |
@@ -92,7 +93,6 @@ static const struct vm_operations_struct f2fs_file_vm_ops = { | |||
92 | .fault = filemap_fault, | 93 | .fault = filemap_fault, |
93 | .map_pages = filemap_map_pages, | 94 | .map_pages = filemap_map_pages, |
94 | .page_mkwrite = f2fs_vm_page_mkwrite, | 95 | .page_mkwrite = f2fs_vm_page_mkwrite, |
95 | .remap_pages = generic_file_remap_pages, | ||
96 | }; | 96 | }; |
97 | 97 | ||
98 | static int get_parent_ino(struct inode *inode, nid_t *pino) | 98 | static int get_parent_ino(struct inode *inode, nid_t *pino) |
@@ -246,6 +246,10 @@ go_write: | |||
246 | sync_nodes: | 246 | sync_nodes: |
247 | sync_node_pages(sbi, ino, &wbc); | 247 | sync_node_pages(sbi, ino, &wbc); |
248 | 248 | ||
249 | /* if cp_error was enabled, we should avoid infinite loop */ | ||
250 | if (unlikely(f2fs_cp_error(sbi))) | ||
251 | goto out; | ||
252 | |||
249 | if (need_inode_block_update(sbi, ino)) { | 253 | if (need_inode_block_update(sbi, ino)) { |
250 | mark_inode_dirty_sync(inode); | 254 | mark_inode_dirty_sync(inode); |
251 | f2fs_write_inode(inode, NULL); | 255 | f2fs_write_inode(inode, NULL); |
@@ -265,6 +269,7 @@ flush_out: | |||
265 | ret = f2fs_issue_flush(sbi); | 269 | ret = f2fs_issue_flush(sbi); |
266 | out: | 270 | out: |
267 | trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); | 271 | trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); |
272 | f2fs_trace_ios(NULL, NULL, 1); | ||
268 | return ret; | 273 | return ret; |
269 | } | 274 | } |
270 | 275 | ||
@@ -351,7 +356,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) | |||
351 | /* find data/hole in dnode block */ | 356 | /* find data/hole in dnode block */ |
352 | for (; dn.ofs_in_node < end_offset; | 357 | for (; dn.ofs_in_node < end_offset; |
353 | dn.ofs_in_node++, pgofs++, | 358 | dn.ofs_in_node++, pgofs++, |
354 | data_ofs = pgofs << PAGE_CACHE_SHIFT) { | 359 | data_ofs = (loff_t)pgofs << PAGE_CACHE_SHIFT) { |
355 | block_t blkaddr; | 360 | block_t blkaddr; |
356 | blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); | 361 | blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); |
357 | 362 | ||
@@ -427,7 +432,8 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) | |||
427 | if (blkaddr == NULL_ADDR) | 432 | if (blkaddr == NULL_ADDR) |
428 | continue; | 433 | continue; |
429 | 434 | ||
430 | update_extent_cache(NULL_ADDR, dn); | 435 | dn->data_blkaddr = NULL_ADDR; |
436 | update_extent_cache(dn); | ||
431 | invalidate_blocks(sbi, blkaddr); | 437 | invalidate_blocks(sbi, blkaddr); |
432 | nr_free++; | 438 | nr_free++; |
433 | } | 439 | } |
@@ -484,8 +490,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) | |||
484 | 490 | ||
485 | trace_f2fs_truncate_blocks_enter(inode, from); | 491 | trace_f2fs_truncate_blocks_enter(inode, from); |
486 | 492 | ||
487 | free_from = (pgoff_t) | 493 | free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1); |
488 | ((from + blocksize - 1) >> (sbi->log_blocksize)); | ||
489 | 494 | ||
490 | if (lock) | 495 | if (lock) |
491 | f2fs_lock_op(sbi); | 496 | f2fs_lock_op(sbi); |
@@ -836,6 +841,19 @@ static long f2fs_fallocate(struct file *file, int mode, | |||
836 | return ret; | 841 | return ret; |
837 | } | 842 | } |
838 | 843 | ||
844 | static int f2fs_release_file(struct inode *inode, struct file *filp) | ||
845 | { | ||
846 | /* some remained atomic pages should discarded */ | ||
847 | if (f2fs_is_atomic_file(inode)) | ||
848 | commit_inmem_pages(inode, true); | ||
849 | if (f2fs_is_volatile_file(inode)) { | ||
850 | set_inode_flag(F2FS_I(inode), FI_DROP_CACHE); | ||
851 | filemap_fdatawrite(inode->i_mapping); | ||
852 | clear_inode_flag(F2FS_I(inode), FI_DROP_CACHE); | ||
853 | } | ||
854 | return 0; | ||
855 | } | ||
856 | |||
839 | #define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) | 857 | #define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) |
840 | #define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) | 858 | #define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) |
841 | 859 | ||
@@ -906,29 +924,30 @@ out: | |||
906 | return ret; | 924 | return ret; |
907 | } | 925 | } |
908 | 926 | ||
927 | static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) | ||
928 | { | ||
929 | struct inode *inode = file_inode(filp); | ||
930 | |||
931 | return put_user(inode->i_generation, (int __user *)arg); | ||
932 | } | ||
933 | |||
909 | static int f2fs_ioc_start_atomic_write(struct file *filp) | 934 | static int f2fs_ioc_start_atomic_write(struct file *filp) |
910 | { | 935 | { |
911 | struct inode *inode = file_inode(filp); | 936 | struct inode *inode = file_inode(filp); |
912 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
913 | 937 | ||
914 | if (!inode_owner_or_capable(inode)) | 938 | if (!inode_owner_or_capable(inode)) |
915 | return -EACCES; | 939 | return -EACCES; |
916 | 940 | ||
917 | f2fs_balance_fs(sbi); | 941 | f2fs_balance_fs(F2FS_I_SB(inode)); |
942 | |||
943 | if (f2fs_is_atomic_file(inode)) | ||
944 | return 0; | ||
918 | 945 | ||
919 | set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); | 946 | set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); |
920 | 947 | ||
921 | return f2fs_convert_inline_inode(inode); | 948 | return f2fs_convert_inline_inode(inode); |
922 | } | 949 | } |
923 | 950 | ||
924 | static int f2fs_release_file(struct inode *inode, struct file *filp) | ||
925 | { | ||
926 | /* some remained atomic pages should discarded */ | ||
927 | if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) | ||
928 | commit_inmem_pages(inode, true); | ||
929 | return 0; | ||
930 | } | ||
931 | |||
932 | static int f2fs_ioc_commit_atomic_write(struct file *filp) | 951 | static int f2fs_ioc_commit_atomic_write(struct file *filp) |
933 | { | 952 | { |
934 | struct inode *inode = file_inode(filp); | 953 | struct inode *inode = file_inode(filp); |
@@ -949,6 +968,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) | |||
949 | 968 | ||
950 | ret = f2fs_sync_file(filp, 0, LONG_MAX, 0); | 969 | ret = f2fs_sync_file(filp, 0, LONG_MAX, 0); |
951 | mnt_drop_write_file(filp); | 970 | mnt_drop_write_file(filp); |
971 | clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); | ||
952 | return ret; | 972 | return ret; |
953 | } | 973 | } |
954 | 974 | ||
@@ -959,11 +979,56 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) | |||
959 | if (!inode_owner_or_capable(inode)) | 979 | if (!inode_owner_or_capable(inode)) |
960 | return -EACCES; | 980 | return -EACCES; |
961 | 981 | ||
982 | if (f2fs_is_volatile_file(inode)) | ||
983 | return 0; | ||
984 | |||
962 | set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); | 985 | set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); |
963 | 986 | ||
964 | return f2fs_convert_inline_inode(inode); | 987 | return f2fs_convert_inline_inode(inode); |
965 | } | 988 | } |
966 | 989 | ||
990 | static int f2fs_ioc_release_volatile_write(struct file *filp) | ||
991 | { | ||
992 | struct inode *inode = file_inode(filp); | ||
993 | |||
994 | if (!inode_owner_or_capable(inode)) | ||
995 | return -EACCES; | ||
996 | |||
997 | if (!f2fs_is_volatile_file(inode)) | ||
998 | return 0; | ||
999 | |||
1000 | punch_hole(inode, 0, F2FS_BLKSIZE); | ||
1001 | return 0; | ||
1002 | } | ||
1003 | |||
1004 | static int f2fs_ioc_abort_volatile_write(struct file *filp) | ||
1005 | { | ||
1006 | struct inode *inode = file_inode(filp); | ||
1007 | int ret; | ||
1008 | |||
1009 | if (!inode_owner_or_capable(inode)) | ||
1010 | return -EACCES; | ||
1011 | |||
1012 | ret = mnt_want_write_file(filp); | ||
1013 | if (ret) | ||
1014 | return ret; | ||
1015 | |||
1016 | f2fs_balance_fs(F2FS_I_SB(inode)); | ||
1017 | |||
1018 | if (f2fs_is_atomic_file(inode)) { | ||
1019 | commit_inmem_pages(inode, false); | ||
1020 | clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); | ||
1021 | } | ||
1022 | |||
1023 | if (f2fs_is_volatile_file(inode)) { | ||
1024 | clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); | ||
1025 | filemap_fdatawrite(inode->i_mapping); | ||
1026 | set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); | ||
1027 | } | ||
1028 | mnt_drop_write_file(filp); | ||
1029 | return ret; | ||
1030 | } | ||
1031 | |||
967 | static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) | 1032 | static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) |
968 | { | 1033 | { |
969 | struct inode *inode = file_inode(filp); | 1034 | struct inode *inode = file_inode(filp); |
@@ -1001,12 +1066,18 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
1001 | return f2fs_ioc_getflags(filp, arg); | 1066 | return f2fs_ioc_getflags(filp, arg); |
1002 | case F2FS_IOC_SETFLAGS: | 1067 | case F2FS_IOC_SETFLAGS: |
1003 | return f2fs_ioc_setflags(filp, arg); | 1068 | return f2fs_ioc_setflags(filp, arg); |
1069 | case F2FS_IOC_GETVERSION: | ||
1070 | return f2fs_ioc_getversion(filp, arg); | ||
1004 | case F2FS_IOC_START_ATOMIC_WRITE: | 1071 | case F2FS_IOC_START_ATOMIC_WRITE: |
1005 | return f2fs_ioc_start_atomic_write(filp); | 1072 | return f2fs_ioc_start_atomic_write(filp); |
1006 | case F2FS_IOC_COMMIT_ATOMIC_WRITE: | 1073 | case F2FS_IOC_COMMIT_ATOMIC_WRITE: |
1007 | return f2fs_ioc_commit_atomic_write(filp); | 1074 | return f2fs_ioc_commit_atomic_write(filp); |
1008 | case F2FS_IOC_START_VOLATILE_WRITE: | 1075 | case F2FS_IOC_START_VOLATILE_WRITE: |
1009 | return f2fs_ioc_start_volatile_write(filp); | 1076 | return f2fs_ioc_start_volatile_write(filp); |
1077 | case F2FS_IOC_RELEASE_VOLATILE_WRITE: | ||
1078 | return f2fs_ioc_release_volatile_write(filp); | ||
1079 | case F2FS_IOC_ABORT_VOLATILE_WRITE: | ||
1080 | return f2fs_ioc_abort_volatile_write(filp); | ||
1010 | case FITRIM: | 1081 | case FITRIM: |
1011 | return f2fs_ioc_fitrim(filp, arg); | 1082 | return f2fs_ioc_fitrim(filp, arg); |
1012 | default: | 1083 | default: |
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index eec0933a4819..76adbc3641f1 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c | |||
@@ -24,8 +24,6 @@ | |||
24 | #include "gc.h" | 24 | #include "gc.h" |
25 | #include <trace/events/f2fs.h> | 25 | #include <trace/events/f2fs.h> |
26 | 26 | ||
27 | static struct kmem_cache *winode_slab; | ||
28 | |||
29 | static int gc_thread_func(void *data) | 27 | static int gc_thread_func(void *data) |
30 | { | 28 | { |
31 | struct f2fs_sb_info *sbi = data; | 29 | struct f2fs_sb_info *sbi = data; |
@@ -46,7 +44,7 @@ static int gc_thread_func(void *data) | |||
46 | break; | 44 | break; |
47 | 45 | ||
48 | if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) { | 46 | if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) { |
49 | wait_ms = increase_sleep_time(gc_th, wait_ms); | 47 | increase_sleep_time(gc_th, &wait_ms); |
50 | continue; | 48 | continue; |
51 | } | 49 | } |
52 | 50 | ||
@@ -67,15 +65,15 @@ static int gc_thread_func(void *data) | |||
67 | continue; | 65 | continue; |
68 | 66 | ||
69 | if (!is_idle(sbi)) { | 67 | if (!is_idle(sbi)) { |
70 | wait_ms = increase_sleep_time(gc_th, wait_ms); | 68 | increase_sleep_time(gc_th, &wait_ms); |
71 | mutex_unlock(&sbi->gc_mutex); | 69 | mutex_unlock(&sbi->gc_mutex); |
72 | continue; | 70 | continue; |
73 | } | 71 | } |
74 | 72 | ||
75 | if (has_enough_invalid_blocks(sbi)) | 73 | if (has_enough_invalid_blocks(sbi)) |
76 | wait_ms = decrease_sleep_time(gc_th, wait_ms); | 74 | decrease_sleep_time(gc_th, &wait_ms); |
77 | else | 75 | else |
78 | wait_ms = increase_sleep_time(gc_th, wait_ms); | 76 | increase_sleep_time(gc_th, &wait_ms); |
79 | 77 | ||
80 | stat_inc_bggc_count(sbi); | 78 | stat_inc_bggc_count(sbi); |
81 | 79 | ||
@@ -356,13 +354,10 @@ static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) | |||
356 | iput(inode); | 354 | iput(inode); |
357 | return; | 355 | return; |
358 | } | 356 | } |
359 | new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS); | 357 | new_ie = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); |
360 | new_ie->inode = inode; | 358 | new_ie->inode = inode; |
361 | retry: | 359 | |
362 | if (radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie)) { | 360 | f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie); |
363 | cond_resched(); | ||
364 | goto retry; | ||
365 | } | ||
366 | list_add_tail(&new_ie->list, &gc_list->ilist); | 361 | list_add_tail(&new_ie->list, &gc_list->ilist); |
367 | } | 362 | } |
368 | 363 | ||
@@ -373,7 +368,7 @@ static void put_gc_inode(struct gc_inode_list *gc_list) | |||
373 | radix_tree_delete(&gc_list->iroot, ie->inode->i_ino); | 368 | radix_tree_delete(&gc_list->iroot, ie->inode->i_ino); |
374 | iput(ie->inode); | 369 | iput(ie->inode); |
375 | list_del(&ie->list); | 370 | list_del(&ie->list); |
376 | kmem_cache_free(winode_slab, ie); | 371 | kmem_cache_free(inode_entry_slab, ie); |
377 | } | 372 | } |
378 | } | 373 | } |
379 | 374 | ||
@@ -703,8 +698,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi) | |||
703 | .iroot = RADIX_TREE_INIT(GFP_NOFS), | 698 | .iroot = RADIX_TREE_INIT(GFP_NOFS), |
704 | }; | 699 | }; |
705 | 700 | ||
706 | cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC; | 701 | cpc.reason = __get_cp_reason(sbi); |
707 | |||
708 | gc_more: | 702 | gc_more: |
709 | if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) | 703 | if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) |
710 | goto stop; | 704 | goto stop; |
@@ -750,17 +744,3 @@ void build_gc_manager(struct f2fs_sb_info *sbi) | |||
750 | { | 744 | { |
751 | DIRTY_I(sbi)->v_ops = &default_v_ops; | 745 | DIRTY_I(sbi)->v_ops = &default_v_ops; |
752 | } | 746 | } |
753 | |||
754 | int __init create_gc_caches(void) | ||
755 | { | ||
756 | winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes", | ||
757 | sizeof(struct inode_entry)); | ||
758 | if (!winode_slab) | ||
759 | return -ENOMEM; | ||
760 | return 0; | ||
761 | } | ||
762 | |||
763 | void destroy_gc_caches(void) | ||
764 | { | ||
765 | kmem_cache_destroy(winode_slab); | ||
766 | } | ||
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 6ff7ad38463e..b4a65be9f7d3 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h | |||
@@ -35,11 +35,6 @@ struct f2fs_gc_kthread { | |||
35 | unsigned int gc_idle; | 35 | unsigned int gc_idle; |
36 | }; | 36 | }; |
37 | 37 | ||
38 | struct inode_entry { | ||
39 | struct list_head list; | ||
40 | struct inode *inode; | ||
41 | }; | ||
42 | |||
43 | struct gc_inode_list { | 38 | struct gc_inode_list { |
44 | struct list_head ilist; | 39 | struct list_head ilist; |
45 | struct radix_tree_root iroot; | 40 | struct radix_tree_root iroot; |
@@ -69,26 +64,26 @@ static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi) | |||
69 | return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100; | 64 | return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100; |
70 | } | 65 | } |
71 | 66 | ||
72 | static inline long increase_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) | 67 | static inline void increase_sleep_time(struct f2fs_gc_kthread *gc_th, |
68 | long *wait) | ||
73 | { | 69 | { |
74 | if (wait == gc_th->no_gc_sleep_time) | 70 | if (*wait == gc_th->no_gc_sleep_time) |
75 | return wait; | 71 | return; |
76 | 72 | ||
77 | wait += gc_th->min_sleep_time; | 73 | *wait += gc_th->min_sleep_time; |
78 | if (wait > gc_th->max_sleep_time) | 74 | if (*wait > gc_th->max_sleep_time) |
79 | wait = gc_th->max_sleep_time; | 75 | *wait = gc_th->max_sleep_time; |
80 | return wait; | ||
81 | } | 76 | } |
82 | 77 | ||
83 | static inline long decrease_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) | 78 | static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th, |
79 | long *wait) | ||
84 | { | 80 | { |
85 | if (wait == gc_th->no_gc_sleep_time) | 81 | if (*wait == gc_th->no_gc_sleep_time) |
86 | wait = gc_th->max_sleep_time; | 82 | *wait = gc_th->max_sleep_time; |
87 | 83 | ||
88 | wait -= gc_th->min_sleep_time; | 84 | *wait -= gc_th->min_sleep_time; |
89 | if (wait <= gc_th->min_sleep_time) | 85 | if (*wait <= gc_th->min_sleep_time) |
90 | wait = gc_th->min_sleep_time; | 86 | *wait = gc_th->min_sleep_time; |
91 | return wait; | ||
92 | } | 87 | } |
93 | 88 | ||
94 | static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) | 89 | static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) |
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index f2d3c581e776..1484c00133cd 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c | |||
@@ -50,6 +50,12 @@ void read_inline_data(struct page *page, struct page *ipage) | |||
50 | SetPageUptodate(page); | 50 | SetPageUptodate(page); |
51 | } | 51 | } |
52 | 52 | ||
53 | static void truncate_inline_data(struct page *ipage) | ||
54 | { | ||
55 | f2fs_wait_on_page_writeback(ipage, NODE); | ||
56 | memset(inline_data_addr(ipage), 0, MAX_INLINE_DATA); | ||
57 | } | ||
58 | |||
53 | int f2fs_read_inline_data(struct inode *inode, struct page *page) | 59 | int f2fs_read_inline_data(struct inode *inode, struct page *page) |
54 | { | 60 | { |
55 | struct page *ipage; | 61 | struct page *ipage; |
@@ -79,7 +85,6 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) | |||
79 | int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) | 85 | int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) |
80 | { | 86 | { |
81 | void *src_addr, *dst_addr; | 87 | void *src_addr, *dst_addr; |
82 | block_t new_blk_addr; | ||
83 | struct f2fs_io_info fio = { | 88 | struct f2fs_io_info fio = { |
84 | .type = DATA, | 89 | .type = DATA, |
85 | .rw = WRITE_SYNC | REQ_PRIO, | 90 | .rw = WRITE_SYNC | REQ_PRIO, |
@@ -115,9 +120,9 @@ no_update: | |||
115 | 120 | ||
116 | /* write data page to try to make data consistent */ | 121 | /* write data page to try to make data consistent */ |
117 | set_page_writeback(page); | 122 | set_page_writeback(page); |
118 | 123 | fio.blk_addr = dn->data_blkaddr; | |
119 | write_data_page(page, dn, &new_blk_addr, &fio); | 124 | write_data_page(page, dn, &fio); |
120 | update_extent_cache(new_blk_addr, dn); | 125 | update_extent_cache(dn); |
121 | f2fs_wait_on_page_writeback(page, DATA); | 126 | f2fs_wait_on_page_writeback(page, DATA); |
122 | if (dirty) | 127 | if (dirty) |
123 | inode_dec_dirty_pages(dn->inode); | 128 | inode_dec_dirty_pages(dn->inode); |
@@ -126,7 +131,7 @@ no_update: | |||
126 | set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE); | 131 | set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE); |
127 | 132 | ||
128 | /* clear inline data and flag after data writeback */ | 133 | /* clear inline data and flag after data writeback */ |
129 | truncate_inline_data(dn->inode_page, 0); | 134 | truncate_inline_data(dn->inode_page); |
130 | clear_out: | 135 | clear_out: |
131 | stat_dec_inline_inode(dn->inode); | 136 | stat_dec_inline_inode(dn->inode); |
132 | f2fs_clear_inline_inode(dn->inode); | 137 | f2fs_clear_inline_inode(dn->inode); |
@@ -199,19 +204,6 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) | |||
199 | return 0; | 204 | return 0; |
200 | } | 205 | } |
201 | 206 | ||
202 | void truncate_inline_data(struct page *ipage, u64 from) | ||
203 | { | ||
204 | void *addr; | ||
205 | |||
206 | if (from >= MAX_INLINE_DATA) | ||
207 | return; | ||
208 | |||
209 | f2fs_wait_on_page_writeback(ipage, NODE); | ||
210 | |||
211 | addr = inline_data_addr(ipage); | ||
212 | memset(addr + from, 0, MAX_INLINE_DATA - from); | ||
213 | } | ||
214 | |||
215 | bool recover_inline_data(struct inode *inode, struct page *npage) | 207 | bool recover_inline_data(struct inode *inode, struct page *npage) |
216 | { | 208 | { |
217 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | 209 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
@@ -253,7 +245,7 @@ process_inline: | |||
253 | if (f2fs_has_inline_data(inode)) { | 245 | if (f2fs_has_inline_data(inode)) { |
254 | ipage = get_node_page(sbi, inode->i_ino); | 246 | ipage = get_node_page(sbi, inode->i_ino); |
255 | f2fs_bug_on(sbi, IS_ERR(ipage)); | 247 | f2fs_bug_on(sbi, IS_ERR(ipage)); |
256 | truncate_inline_data(ipage, 0); | 248 | truncate_inline_data(ipage); |
257 | f2fs_clear_inline_inode(inode); | 249 | f2fs_clear_inline_inode(inode); |
258 | update_inode(inode, ipage); | 250 | update_inode(inode, ipage); |
259 | f2fs_put_page(ipage, 1); | 251 | f2fs_put_page(ipage, 1); |
@@ -371,7 +363,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, | |||
371 | set_page_dirty(page); | 363 | set_page_dirty(page); |
372 | 364 | ||
373 | /* clear inline dir and flag after data writeback */ | 365 | /* clear inline dir and flag after data writeback */ |
374 | truncate_inline_data(ipage, 0); | 366 | truncate_inline_data(ipage); |
375 | 367 | ||
376 | stat_dec_inline_dir(dir); | 368 | stat_dec_inline_dir(dir); |
377 | clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); | 369 | clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); |
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 196cc7843aaf..2d002e3738a7 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c | |||
@@ -67,29 +67,23 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) | |||
67 | } | 67 | } |
68 | } | 68 | } |
69 | 69 | ||
70 | static int __recover_inline_status(struct inode *inode, struct page *ipage) | 70 | static void __recover_inline_status(struct inode *inode, struct page *ipage) |
71 | { | 71 | { |
72 | void *inline_data = inline_data_addr(ipage); | 72 | void *inline_data = inline_data_addr(ipage); |
73 | struct f2fs_inode *ri; | 73 | __le32 *start = inline_data; |
74 | void *zbuf; | 74 | __le32 *end = start + MAX_INLINE_DATA / sizeof(__le32); |
75 | 75 | ||
76 | zbuf = kzalloc(MAX_INLINE_DATA, GFP_NOFS); | 76 | while (start < end) { |
77 | if (!zbuf) | 77 | if (*start++) { |
78 | return -ENOMEM; | 78 | f2fs_wait_on_page_writeback(ipage, NODE); |
79 | 79 | ||
80 | if (!memcmp(zbuf, inline_data, MAX_INLINE_DATA)) { | 80 | set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); |
81 | kfree(zbuf); | 81 | set_raw_inline(F2FS_I(inode), F2FS_INODE(ipage)); |
82 | return 0; | 82 | set_page_dirty(ipage); |
83 | return; | ||
84 | } | ||
83 | } | 85 | } |
84 | kfree(zbuf); | 86 | return; |
85 | |||
86 | f2fs_wait_on_page_writeback(ipage, NODE); | ||
87 | set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); | ||
88 | |||
89 | ri = F2FS_INODE(ipage); | ||
90 | set_raw_inline(F2FS_I(inode), ri); | ||
91 | set_page_dirty(ipage); | ||
92 | return 0; | ||
93 | } | 87 | } |
94 | 88 | ||
95 | static int do_read_inode(struct inode *inode) | 89 | static int do_read_inode(struct inode *inode) |
@@ -98,7 +92,6 @@ static int do_read_inode(struct inode *inode) | |||
98 | struct f2fs_inode_info *fi = F2FS_I(inode); | 92 | struct f2fs_inode_info *fi = F2FS_I(inode); |
99 | struct page *node_page; | 93 | struct page *node_page; |
100 | struct f2fs_inode *ri; | 94 | struct f2fs_inode *ri; |
101 | int err = 0; | ||
102 | 95 | ||
103 | /* Check if ino is within scope */ | 96 | /* Check if ino is within scope */ |
104 | if (check_nid_range(sbi, inode->i_ino)) { | 97 | if (check_nid_range(sbi, inode->i_ino)) { |
@@ -142,7 +135,7 @@ static int do_read_inode(struct inode *inode) | |||
142 | 135 | ||
143 | /* check data exist */ | 136 | /* check data exist */ |
144 | if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) | 137 | if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) |
145 | err = __recover_inline_status(inode, node_page); | 138 | __recover_inline_status(inode, node_page); |
146 | 139 | ||
147 | /* get rdev by using inline_info */ | 140 | /* get rdev by using inline_info */ |
148 | __get_inode_rdev(inode, ri); | 141 | __get_inode_rdev(inode, ri); |
@@ -152,7 +145,7 @@ static int do_read_inode(struct inode *inode) | |||
152 | stat_inc_inline_inode(inode); | 145 | stat_inc_inline_inode(inode); |
153 | stat_inc_inline_dir(inode); | 146 | stat_inc_inline_dir(inode); |
154 | 147 | ||
155 | return err; | 148 | return 0; |
156 | } | 149 | } |
157 | 150 | ||
158 | struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) | 151 | struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) |
@@ -304,7 +297,7 @@ void f2fs_evict_inode(struct inode *inode) | |||
304 | nid_t xnid = F2FS_I(inode)->i_xattr_nid; | 297 | nid_t xnid = F2FS_I(inode)->i_xattr_nid; |
305 | 298 | ||
306 | /* some remained atomic pages should discarded */ | 299 | /* some remained atomic pages should discarded */ |
307 | if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) | 300 | if (f2fs_is_atomic_file(inode)) |
308 | commit_inmem_pages(inode, true); | 301 | commit_inmem_pages(inode, true); |
309 | 302 | ||
310 | trace_f2fs_evict_inode(inode); | 303 | trace_f2fs_evict_inode(inode); |
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 547a2deeb1ac..e79639a9787a 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c | |||
@@ -299,7 +299,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
299 | inode->i_op = &f2fs_dir_inode_operations; | 299 | inode->i_op = &f2fs_dir_inode_operations; |
300 | inode->i_fop = &f2fs_dir_operations; | 300 | inode->i_fop = &f2fs_dir_operations; |
301 | inode->i_mapping->a_ops = &f2fs_dblock_aops; | 301 | inode->i_mapping->a_ops = &f2fs_dblock_aops; |
302 | mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); | 302 | mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); |
303 | 303 | ||
304 | set_inode_flag(F2FS_I(inode), FI_INC_LINK); | 304 | set_inode_flag(F2FS_I(inode), FI_INC_LINK); |
305 | f2fs_lock_op(sbi); | 305 | f2fs_lock_op(sbi); |
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f83326ca32ef..97bd9d3db882 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include "f2fs.h" | 19 | #include "f2fs.h" |
20 | #include "node.h" | 20 | #include "node.h" |
21 | #include "segment.h" | 21 | #include "segment.h" |
22 | #include "trace.h" | ||
22 | #include <trace/events/f2fs.h> | 23 | #include <trace/events/f2fs.h> |
23 | 24 | ||
24 | #define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock) | 25 | #define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock) |
@@ -57,12 +58,13 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) | |||
57 | } else if (type == INO_ENTRIES) { | 58 | } else if (type == INO_ENTRIES) { |
58 | int i; | 59 | int i; |
59 | 60 | ||
60 | if (sbi->sb->s_bdi->dirty_exceeded) | ||
61 | return false; | ||
62 | for (i = 0; i <= UPDATE_INO; i++) | 61 | for (i = 0; i <= UPDATE_INO; i++) |
63 | mem_size += (sbi->im[i].ino_num * | 62 | mem_size += (sbi->im[i].ino_num * |
64 | sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; | 63 | sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; |
65 | res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); | 64 | res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); |
65 | } else { | ||
66 | if (sbi->sb->s_bdi->dirty_exceeded) | ||
67 | return false; | ||
66 | } | 68 | } |
67 | return res; | 69 | return res; |
68 | } | 70 | } |
@@ -268,7 +270,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, | |||
268 | e = __lookup_nat_cache(nm_i, ni->nid); | 270 | e = __lookup_nat_cache(nm_i, ni->nid); |
269 | if (!e) { | 271 | if (!e) { |
270 | e = grab_nat_entry(nm_i, ni->nid); | 272 | e = grab_nat_entry(nm_i, ni->nid); |
271 | e->ni = *ni; | 273 | copy_node_info(&e->ni, ni); |
272 | f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); | 274 | f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); |
273 | } else if (new_blkaddr == NEW_ADDR) { | 275 | } else if (new_blkaddr == NEW_ADDR) { |
274 | /* | 276 | /* |
@@ -276,7 +278,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, | |||
276 | * previous nat entry can be remained in nat cache. | 278 | * previous nat entry can be remained in nat cache. |
277 | * So, reinitialize it with new information. | 279 | * So, reinitialize it with new information. |
278 | */ | 280 | */ |
279 | e->ni = *ni; | 281 | copy_node_info(&e->ni, ni); |
280 | f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR); | 282 | f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR); |
281 | } | 283 | } |
282 | 284 | ||
@@ -346,7 +348,6 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) | |||
346 | struct nat_entry *e; | 348 | struct nat_entry *e; |
347 | int i; | 349 | int i; |
348 | 350 | ||
349 | memset(&ne, 0, sizeof(struct f2fs_nat_entry)); | ||
350 | ni->nid = nid; | 351 | ni->nid = nid; |
351 | 352 | ||
352 | /* Check nat cache */ | 353 | /* Check nat cache */ |
@@ -361,6 +362,8 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) | |||
361 | if (e) | 362 | if (e) |
362 | return; | 363 | return; |
363 | 364 | ||
365 | memset(&ne, 0, sizeof(struct f2fs_nat_entry)); | ||
366 | |||
364 | /* Check current segment summary */ | 367 | /* Check current segment summary */ |
365 | mutex_lock(&curseg->curseg_mutex); | 368 | mutex_lock(&curseg->curseg_mutex); |
366 | i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0); | 369 | i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0); |
@@ -471,7 +474,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) | |||
471 | { | 474 | { |
472 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); | 475 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); |
473 | struct page *npage[4]; | 476 | struct page *npage[4]; |
474 | struct page *parent; | 477 | struct page *parent = NULL; |
475 | int offset[4]; | 478 | int offset[4]; |
476 | unsigned int noffset[4]; | 479 | unsigned int noffset[4]; |
477 | nid_t nids[4]; | 480 | nid_t nids[4]; |
@@ -488,6 +491,14 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) | |||
488 | if (IS_ERR(npage[0])) | 491 | if (IS_ERR(npage[0])) |
489 | return PTR_ERR(npage[0]); | 492 | return PTR_ERR(npage[0]); |
490 | } | 493 | } |
494 | |||
495 | /* if inline_data is set, should not report any block indices */ | ||
496 | if (f2fs_has_inline_data(dn->inode) && index) { | ||
497 | err = -EINVAL; | ||
498 | f2fs_put_page(npage[0], 1); | ||
499 | goto release_out; | ||
500 | } | ||
501 | |||
491 | parent = npage[0]; | 502 | parent = npage[0]; |
492 | if (level != 0) | 503 | if (level != 0) |
493 | nids[1] = get_nid(parent, offset[0], true); | 504 | nids[1] = get_nid(parent, offset[0], true); |
@@ -585,7 +596,7 @@ static void truncate_node(struct dnode_of_data *dn) | |||
585 | } | 596 | } |
586 | invalidate: | 597 | invalidate: |
587 | clear_node_page_dirty(dn->node_page); | 598 | clear_node_page_dirty(dn->node_page); |
588 | F2FS_SET_SB_DIRT(sbi); | 599 | set_sbi_flag(sbi, SBI_IS_DIRTY); |
589 | 600 | ||
590 | f2fs_put_page(dn->node_page, 1); | 601 | f2fs_put_page(dn->node_page, 1); |
591 | 602 | ||
@@ -976,6 +987,10 @@ static int read_node_page(struct page *page, int rw) | |||
976 | { | 987 | { |
977 | struct f2fs_sb_info *sbi = F2FS_P_SB(page); | 988 | struct f2fs_sb_info *sbi = F2FS_P_SB(page); |
978 | struct node_info ni; | 989 | struct node_info ni; |
990 | struct f2fs_io_info fio = { | ||
991 | .type = NODE, | ||
992 | .rw = rw, | ||
993 | }; | ||
979 | 994 | ||
980 | get_node_info(sbi, page->index, &ni); | 995 | get_node_info(sbi, page->index, &ni); |
981 | 996 | ||
@@ -987,7 +1002,8 @@ static int read_node_page(struct page *page, int rw) | |||
987 | if (PageUptodate(page)) | 1002 | if (PageUptodate(page)) |
988 | return LOCKED_PAGE; | 1003 | return LOCKED_PAGE; |
989 | 1004 | ||
990 | return f2fs_submit_page_bio(sbi, page, ni.blk_addr, rw); | 1005 | fio.blk_addr = ni.blk_addr; |
1006 | return f2fs_submit_page_bio(sbi, page, &fio); | ||
991 | } | 1007 | } |
992 | 1008 | ||
993 | /* | 1009 | /* |
@@ -1028,11 +1044,11 @@ repeat: | |||
1028 | err = read_node_page(page, READ_SYNC); | 1044 | err = read_node_page(page, READ_SYNC); |
1029 | if (err < 0) | 1045 | if (err < 0) |
1030 | return ERR_PTR(err); | 1046 | return ERR_PTR(err); |
1031 | else if (err == LOCKED_PAGE) | 1047 | else if (err != LOCKED_PAGE) |
1032 | goto got_it; | 1048 | lock_page(page); |
1033 | 1049 | ||
1034 | lock_page(page); | ||
1035 | if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) { | 1050 | if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) { |
1051 | ClearPageUptodate(page); | ||
1036 | f2fs_put_page(page, 1); | 1052 | f2fs_put_page(page, 1); |
1037 | return ERR_PTR(-EIO); | 1053 | return ERR_PTR(-EIO); |
1038 | } | 1054 | } |
@@ -1040,7 +1056,6 @@ repeat: | |||
1040 | f2fs_put_page(page, 1); | 1056 | f2fs_put_page(page, 1); |
1041 | goto repeat; | 1057 | goto repeat; |
1042 | } | 1058 | } |
1043 | got_it: | ||
1044 | return page; | 1059 | return page; |
1045 | } | 1060 | } |
1046 | 1061 | ||
@@ -1268,7 +1283,6 @@ static int f2fs_write_node_page(struct page *page, | |||
1268 | { | 1283 | { |
1269 | struct f2fs_sb_info *sbi = F2FS_P_SB(page); | 1284 | struct f2fs_sb_info *sbi = F2FS_P_SB(page); |
1270 | nid_t nid; | 1285 | nid_t nid; |
1271 | block_t new_addr; | ||
1272 | struct node_info ni; | 1286 | struct node_info ni; |
1273 | struct f2fs_io_info fio = { | 1287 | struct f2fs_io_info fio = { |
1274 | .type = NODE, | 1288 | .type = NODE, |
@@ -1277,7 +1291,7 @@ static int f2fs_write_node_page(struct page *page, | |||
1277 | 1291 | ||
1278 | trace_f2fs_writepage(page, NODE); | 1292 | trace_f2fs_writepage(page, NODE); |
1279 | 1293 | ||
1280 | if (unlikely(sbi->por_doing)) | 1294 | if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) |
1281 | goto redirty_out; | 1295 | goto redirty_out; |
1282 | if (unlikely(f2fs_cp_error(sbi))) | 1296 | if (unlikely(f2fs_cp_error(sbi))) |
1283 | goto redirty_out; | 1297 | goto redirty_out; |
@@ -1303,9 +1317,11 @@ static int f2fs_write_node_page(struct page *page, | |||
1303 | } else { | 1317 | } else { |
1304 | down_read(&sbi->node_write); | 1318 | down_read(&sbi->node_write); |
1305 | } | 1319 | } |
1320 | |||
1306 | set_page_writeback(page); | 1321 | set_page_writeback(page); |
1307 | write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); | 1322 | fio.blk_addr = ni.blk_addr; |
1308 | set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page)); | 1323 | write_node_page(sbi, page, nid, &fio); |
1324 | set_node_addr(sbi, &ni, fio.blk_addr, is_fsync_dnode(page)); | ||
1309 | dec_page_count(sbi, F2FS_DIRTY_NODES); | 1325 | dec_page_count(sbi, F2FS_DIRTY_NODES); |
1310 | up_read(&sbi->node_write); | 1326 | up_read(&sbi->node_write); |
1311 | unlock_page(page); | 1327 | unlock_page(page); |
@@ -1355,26 +1371,12 @@ static int f2fs_set_node_page_dirty(struct page *page) | |||
1355 | __set_page_dirty_nobuffers(page); | 1371 | __set_page_dirty_nobuffers(page); |
1356 | inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); | 1372 | inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); |
1357 | SetPagePrivate(page); | 1373 | SetPagePrivate(page); |
1374 | f2fs_trace_pid(page); | ||
1358 | return 1; | 1375 | return 1; |
1359 | } | 1376 | } |
1360 | return 0; | 1377 | return 0; |
1361 | } | 1378 | } |
1362 | 1379 | ||
1363 | static void f2fs_invalidate_node_page(struct page *page, unsigned int offset, | ||
1364 | unsigned int length) | ||
1365 | { | ||
1366 | struct inode *inode = page->mapping->host; | ||
1367 | if (PageDirty(page)) | ||
1368 | dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_NODES); | ||
1369 | ClearPagePrivate(page); | ||
1370 | } | ||
1371 | |||
1372 | static int f2fs_release_node_page(struct page *page, gfp_t wait) | ||
1373 | { | ||
1374 | ClearPagePrivate(page); | ||
1375 | return 1; | ||
1376 | } | ||
1377 | |||
1378 | /* | 1380 | /* |
1379 | * Structure of the f2fs node operations | 1381 | * Structure of the f2fs node operations |
1380 | */ | 1382 | */ |
@@ -1382,8 +1384,8 @@ const struct address_space_operations f2fs_node_aops = { | |||
1382 | .writepage = f2fs_write_node_page, | 1384 | .writepage = f2fs_write_node_page, |
1383 | .writepages = f2fs_write_node_pages, | 1385 | .writepages = f2fs_write_node_pages, |
1384 | .set_page_dirty = f2fs_set_node_page_dirty, | 1386 | .set_page_dirty = f2fs_set_node_page_dirty, |
1385 | .invalidatepage = f2fs_invalidate_node_page, | 1387 | .invalidatepage = f2fs_invalidate_page, |
1386 | .releasepage = f2fs_release_node_page, | 1388 | .releasepage = f2fs_release_page, |
1387 | }; | 1389 | }; |
1388 | 1390 | ||
1389 | static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, | 1391 | static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, |
@@ -1726,80 +1728,41 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) | |||
1726 | return 0; | 1728 | return 0; |
1727 | } | 1729 | } |
1728 | 1730 | ||
1729 | /* | ||
1730 | * ra_sum_pages() merge contiguous pages into one bio and submit. | ||
1731 | * these pre-read pages are allocated in bd_inode's mapping tree. | ||
1732 | */ | ||
1733 | static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages, | ||
1734 | int start, int nrpages) | ||
1735 | { | ||
1736 | struct inode *inode = sbi->sb->s_bdev->bd_inode; | ||
1737 | struct address_space *mapping = inode->i_mapping; | ||
1738 | int i, page_idx = start; | ||
1739 | struct f2fs_io_info fio = { | ||
1740 | .type = META, | ||
1741 | .rw = READ_SYNC | REQ_META | REQ_PRIO | ||
1742 | }; | ||
1743 | |||
1744 | for (i = 0; page_idx < start + nrpages; page_idx++, i++) { | ||
1745 | /* alloc page in bd_inode for reading node summary info */ | ||
1746 | pages[i] = grab_cache_page(mapping, page_idx); | ||
1747 | if (!pages[i]) | ||
1748 | break; | ||
1749 | f2fs_submit_page_mbio(sbi, pages[i], page_idx, &fio); | ||
1750 | } | ||
1751 | |||
1752 | f2fs_submit_merged_bio(sbi, META, READ); | ||
1753 | return i; | ||
1754 | } | ||
1755 | |||
1756 | int restore_node_summary(struct f2fs_sb_info *sbi, | 1731 | int restore_node_summary(struct f2fs_sb_info *sbi, |
1757 | unsigned int segno, struct f2fs_summary_block *sum) | 1732 | unsigned int segno, struct f2fs_summary_block *sum) |
1758 | { | 1733 | { |
1759 | struct f2fs_node *rn; | 1734 | struct f2fs_node *rn; |
1760 | struct f2fs_summary *sum_entry; | 1735 | struct f2fs_summary *sum_entry; |
1761 | struct inode *inode = sbi->sb->s_bdev->bd_inode; | ||
1762 | block_t addr; | 1736 | block_t addr; |
1763 | int bio_blocks = MAX_BIO_BLOCKS(sbi); | 1737 | int bio_blocks = MAX_BIO_BLOCKS(sbi); |
1764 | struct page *pages[bio_blocks]; | 1738 | int i, idx, last_offset, nrpages; |
1765 | int i, idx, last_offset, nrpages, err = 0; | ||
1766 | 1739 | ||
1767 | /* scan the node segment */ | 1740 | /* scan the node segment */ |
1768 | last_offset = sbi->blocks_per_seg; | 1741 | last_offset = sbi->blocks_per_seg; |
1769 | addr = START_BLOCK(sbi, segno); | 1742 | addr = START_BLOCK(sbi, segno); |
1770 | sum_entry = &sum->entries[0]; | 1743 | sum_entry = &sum->entries[0]; |
1771 | 1744 | ||
1772 | for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) { | 1745 | for (i = 0; i < last_offset; i += nrpages, addr += nrpages) { |
1773 | nrpages = min(last_offset - i, bio_blocks); | 1746 | nrpages = min(last_offset - i, bio_blocks); |
1774 | 1747 | ||
1775 | /* readahead node pages */ | 1748 | /* readahead node pages */ |
1776 | nrpages = ra_sum_pages(sbi, pages, addr, nrpages); | 1749 | ra_meta_pages(sbi, addr, nrpages, META_POR); |
1777 | if (!nrpages) | ||
1778 | return -ENOMEM; | ||
1779 | 1750 | ||
1780 | for (idx = 0; idx < nrpages; idx++) { | 1751 | for (idx = addr; idx < addr + nrpages; idx++) { |
1781 | if (err) | 1752 | struct page *page = get_meta_page(sbi, idx); |
1782 | goto skip; | ||
1783 | 1753 | ||
1784 | lock_page(pages[idx]); | 1754 | rn = F2FS_NODE(page); |
1785 | if (unlikely(!PageUptodate(pages[idx]))) { | 1755 | sum_entry->nid = rn->footer.nid; |
1786 | err = -EIO; | 1756 | sum_entry->version = 0; |
1787 | } else { | 1757 | sum_entry->ofs_in_node = 0; |
1788 | rn = F2FS_NODE(pages[idx]); | 1758 | sum_entry++; |
1789 | sum_entry->nid = rn->footer.nid; | 1759 | f2fs_put_page(page, 1); |
1790 | sum_entry->version = 0; | ||
1791 | sum_entry->ofs_in_node = 0; | ||
1792 | sum_entry++; | ||
1793 | } | ||
1794 | unlock_page(pages[idx]); | ||
1795 | skip: | ||
1796 | page_cache_release(pages[idx]); | ||
1797 | } | 1760 | } |
1798 | 1761 | ||
1799 | invalidate_mapping_pages(inode->i_mapping, addr, | 1762 | invalidate_mapping_pages(META_MAPPING(sbi), addr, |
1800 | addr + nrpages); | 1763 | addr + nrpages); |
1801 | } | 1764 | } |
1802 | return err; | 1765 | return 0; |
1803 | } | 1766 | } |
1804 | 1767 | ||
1805 | static void remove_nats_in_journal(struct f2fs_sb_info *sbi) | 1768 | static void remove_nats_in_journal(struct f2fs_sb_info *sbi) |
@@ -1923,7 +1886,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) | |||
1923 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 1886 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
1924 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); | 1887 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); |
1925 | struct f2fs_summary_block *sum = curseg->sum_blk; | 1888 | struct f2fs_summary_block *sum = curseg->sum_blk; |
1926 | struct nat_entry_set *setvec[NATVEC_SIZE]; | 1889 | struct nat_entry_set *setvec[SETVEC_SIZE]; |
1927 | struct nat_entry_set *set, *tmp; | 1890 | struct nat_entry_set *set, *tmp; |
1928 | unsigned int found; | 1891 | unsigned int found; |
1929 | nid_t set_idx = 0; | 1892 | nid_t set_idx = 0; |
@@ -1940,7 +1903,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) | |||
1940 | remove_nats_in_journal(sbi); | 1903 | remove_nats_in_journal(sbi); |
1941 | 1904 | ||
1942 | while ((found = __gang_lookup_nat_set(nm_i, | 1905 | while ((found = __gang_lookup_nat_set(nm_i, |
1943 | set_idx, NATVEC_SIZE, setvec))) { | 1906 | set_idx, SETVEC_SIZE, setvec))) { |
1944 | unsigned idx; | 1907 | unsigned idx; |
1945 | set_idx = setvec[found - 1]->set + 1; | 1908 | set_idx = setvec[found - 1]->set + 1; |
1946 | for (idx = 0; idx < found; idx++) | 1909 | for (idx = 0; idx < found; idx++) |
@@ -2020,6 +1983,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) | |||
2020 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 1983 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
2021 | struct free_nid *i, *next_i; | 1984 | struct free_nid *i, *next_i; |
2022 | struct nat_entry *natvec[NATVEC_SIZE]; | 1985 | struct nat_entry *natvec[NATVEC_SIZE]; |
1986 | struct nat_entry_set *setvec[SETVEC_SIZE]; | ||
2023 | nid_t nid = 0; | 1987 | nid_t nid = 0; |
2024 | unsigned int found; | 1988 | unsigned int found; |
2025 | 1989 | ||
@@ -2044,11 +2008,27 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) | |||
2044 | while ((found = __gang_lookup_nat_cache(nm_i, | 2008 | while ((found = __gang_lookup_nat_cache(nm_i, |
2045 | nid, NATVEC_SIZE, natvec))) { | 2009 | nid, NATVEC_SIZE, natvec))) { |
2046 | unsigned idx; | 2010 | unsigned idx; |
2011 | |||
2047 | nid = nat_get_nid(natvec[found - 1]) + 1; | 2012 | nid = nat_get_nid(natvec[found - 1]) + 1; |
2048 | for (idx = 0; idx < found; idx++) | 2013 | for (idx = 0; idx < found; idx++) |
2049 | __del_from_nat_cache(nm_i, natvec[idx]); | 2014 | __del_from_nat_cache(nm_i, natvec[idx]); |
2050 | } | 2015 | } |
2051 | f2fs_bug_on(sbi, nm_i->nat_cnt); | 2016 | f2fs_bug_on(sbi, nm_i->nat_cnt); |
2017 | |||
2018 | /* destroy nat set cache */ | ||
2019 | nid = 0; | ||
2020 | while ((found = __gang_lookup_nat_set(nm_i, | ||
2021 | nid, SETVEC_SIZE, setvec))) { | ||
2022 | unsigned idx; | ||
2023 | |||
2024 | nid = setvec[found - 1]->set + 1; | ||
2025 | for (idx = 0; idx < found; idx++) { | ||
2026 | /* entry_cnt is not zero, when cp_error was occurred */ | ||
2027 | f2fs_bug_on(sbi, !list_empty(&setvec[idx]->entry_list)); | ||
2028 | radix_tree_delete(&nm_i->nat_set_root, setvec[idx]->set); | ||
2029 | kmem_cache_free(nat_entry_set_slab, setvec[idx]); | ||
2030 | } | ||
2031 | } | ||
2052 | up_write(&nm_i->nat_tree_lock); | 2032 | up_write(&nm_i->nat_tree_lock); |
2053 | 2033 | ||
2054 | kfree(nm_i->nat_bitmap); | 2034 | kfree(nm_i->nat_bitmap); |
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index d10b6448a671..f405bbf2435a 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h | |||
@@ -25,10 +25,19 @@ | |||
25 | 25 | ||
26 | /* vector size for gang look-up from nat cache that consists of radix tree */ | 26 | /* vector size for gang look-up from nat cache that consists of radix tree */ |
27 | #define NATVEC_SIZE 64 | 27 | #define NATVEC_SIZE 64 |
28 | #define SETVEC_SIZE 32 | ||
28 | 29 | ||
29 | /* return value for read_node_page */ | 30 | /* return value for read_node_page */ |
30 | #define LOCKED_PAGE 1 | 31 | #define LOCKED_PAGE 1 |
31 | 32 | ||
33 | /* For flag in struct node_info */ | ||
34 | enum { | ||
35 | IS_CHECKPOINTED, /* is it checkpointed before? */ | ||
36 | HAS_FSYNCED_INODE, /* is the inode fsynced before? */ | ||
37 | HAS_LAST_FSYNC, /* has the latest node fsync mark? */ | ||
38 | IS_DIRTY, /* this nat entry is dirty? */ | ||
39 | }; | ||
40 | |||
32 | /* | 41 | /* |
33 | * For node information | 42 | * For node information |
34 | */ | 43 | */ |
@@ -37,18 +46,11 @@ struct node_info { | |||
37 | nid_t ino; /* inode number of the node's owner */ | 46 | nid_t ino; /* inode number of the node's owner */ |
38 | block_t blk_addr; /* block address of the node */ | 47 | block_t blk_addr; /* block address of the node */ |
39 | unsigned char version; /* version of the node */ | 48 | unsigned char version; /* version of the node */ |
40 | }; | 49 | unsigned char flag; /* for node information bits */ |
41 | |||
42 | enum { | ||
43 | IS_CHECKPOINTED, /* is it checkpointed before? */ | ||
44 | HAS_FSYNCED_INODE, /* is the inode fsynced before? */ | ||
45 | HAS_LAST_FSYNC, /* has the latest node fsync mark? */ | ||
46 | IS_DIRTY, /* this nat entry is dirty? */ | ||
47 | }; | 50 | }; |
48 | 51 | ||
49 | struct nat_entry { | 52 | struct nat_entry { |
50 | struct list_head list; /* for clean or dirty nat list */ | 53 | struct list_head list; /* for clean or dirty nat list */ |
51 | unsigned char flag; /* for node information bits */ | ||
52 | struct node_info ni; /* in-memory node information */ | 54 | struct node_info ni; /* in-memory node information */ |
53 | }; | 55 | }; |
54 | 56 | ||
@@ -63,20 +65,30 @@ struct nat_entry { | |||
63 | 65 | ||
64 | #define inc_node_version(version) (++version) | 66 | #define inc_node_version(version) (++version) |
65 | 67 | ||
68 | static inline void copy_node_info(struct node_info *dst, | ||
69 | struct node_info *src) | ||
70 | { | ||
71 | dst->nid = src->nid; | ||
72 | dst->ino = src->ino; | ||
73 | dst->blk_addr = src->blk_addr; | ||
74 | dst->version = src->version; | ||
75 | /* should not copy flag here */ | ||
76 | } | ||
77 | |||
66 | static inline void set_nat_flag(struct nat_entry *ne, | 78 | static inline void set_nat_flag(struct nat_entry *ne, |
67 | unsigned int type, bool set) | 79 | unsigned int type, bool set) |
68 | { | 80 | { |
69 | unsigned char mask = 0x01 << type; | 81 | unsigned char mask = 0x01 << type; |
70 | if (set) | 82 | if (set) |
71 | ne->flag |= mask; | 83 | ne->ni.flag |= mask; |
72 | else | 84 | else |
73 | ne->flag &= ~mask; | 85 | ne->ni.flag &= ~mask; |
74 | } | 86 | } |
75 | 87 | ||
76 | static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type) | 88 | static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type) |
77 | { | 89 | { |
78 | unsigned char mask = 0x01 << type; | 90 | unsigned char mask = 0x01 << type; |
79 | return ne->flag & mask; | 91 | return ne->ni.flag & mask; |
80 | } | 92 | } |
81 | 93 | ||
82 | static inline void nat_reset_flag(struct nat_entry *ne) | 94 | static inline void nat_reset_flag(struct nat_entry *ne) |
@@ -108,6 +120,7 @@ enum mem_type { | |||
108 | NAT_ENTRIES, /* indicates the cached nat entry */ | 120 | NAT_ENTRIES, /* indicates the cached nat entry */ |
109 | DIRTY_DENTS, /* indicates dirty dentry pages */ | 121 | DIRTY_DENTS, /* indicates dirty dentry pages */ |
110 | INO_ENTRIES, /* indicates inode entries */ | 122 | INO_ENTRIES, /* indicates inode entries */ |
123 | BASE_CHECK, /* check kernel status */ | ||
111 | }; | 124 | }; |
112 | 125 | ||
113 | struct nat_entry_set { | 126 | struct nat_entry_set { |
@@ -200,11 +213,19 @@ static inline void fill_node_footer(struct page *page, nid_t nid, | |||
200 | nid_t ino, unsigned int ofs, bool reset) | 213 | nid_t ino, unsigned int ofs, bool reset) |
201 | { | 214 | { |
202 | struct f2fs_node *rn = F2FS_NODE(page); | 215 | struct f2fs_node *rn = F2FS_NODE(page); |
216 | unsigned int old_flag = 0; | ||
217 | |||
203 | if (reset) | 218 | if (reset) |
204 | memset(rn, 0, sizeof(*rn)); | 219 | memset(rn, 0, sizeof(*rn)); |
220 | else | ||
221 | old_flag = le32_to_cpu(rn->footer.flag); | ||
222 | |||
205 | rn->footer.nid = cpu_to_le32(nid); | 223 | rn->footer.nid = cpu_to_le32(nid); |
206 | rn->footer.ino = cpu_to_le32(ino); | 224 | rn->footer.ino = cpu_to_le32(ino); |
207 | rn->footer.flag = cpu_to_le32(ofs << OFFSET_BIT_SHIFT); | 225 | |
226 | /* should remain old flag bits such as COLD_BIT_SHIFT */ | ||
227 | rn->footer.flag = cpu_to_le32((ofs << OFFSET_BIT_SHIFT) | | ||
228 | (old_flag & OFFSET_BIT_MASK)); | ||
208 | } | 229 | } |
209 | 230 | ||
210 | static inline void copy_node_footer(struct page *dst, struct page *src) | 231 | static inline void copy_node_footer(struct page *dst, struct page *src) |
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 9160a37e1c7a..41afb9534bbd 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c | |||
@@ -346,6 +346,10 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
346 | if (IS_INODE(page)) { | 346 | if (IS_INODE(page)) { |
347 | recover_inline_xattr(inode, page); | 347 | recover_inline_xattr(inode, page); |
348 | } else if (f2fs_has_xattr_block(ofs_of_node(page))) { | 348 | } else if (f2fs_has_xattr_block(ofs_of_node(page))) { |
349 | /* | ||
350 | * Deprecated; xattr blocks should be found from cold log. | ||
351 | * But, we should remain this for backward compatibility. | ||
352 | */ | ||
349 | recover_xattr_data(inode, page, blkaddr); | 353 | recover_xattr_data(inode, page, blkaddr); |
350 | goto out; | 354 | goto out; |
351 | } | 355 | } |
@@ -396,7 +400,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
396 | 400 | ||
397 | /* write dummy data page */ | 401 | /* write dummy data page */ |
398 | recover_data_page(sbi, NULL, &sum, src, dest); | 402 | recover_data_page(sbi, NULL, &sum, src, dest); |
399 | update_extent_cache(dest, &dn); | 403 | dn.data_blkaddr = dest; |
404 | update_extent_cache(&dn); | ||
400 | recovered++; | 405 | recovered++; |
401 | } | 406 | } |
402 | dn.ofs_in_node++; | 407 | dn.ofs_in_node++; |
@@ -503,7 +508,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) | |||
503 | INIT_LIST_HEAD(&inode_list); | 508 | INIT_LIST_HEAD(&inode_list); |
504 | 509 | ||
505 | /* step #1: find fsynced inode numbers */ | 510 | /* step #1: find fsynced inode numbers */ |
506 | sbi->por_doing = true; | 511 | set_sbi_flag(sbi, SBI_POR_DOING); |
507 | 512 | ||
508 | /* prevent checkpoint */ | 513 | /* prevent checkpoint */ |
509 | mutex_lock(&sbi->cp_mutex); | 514 | mutex_lock(&sbi->cp_mutex); |
@@ -536,7 +541,7 @@ out: | |||
536 | truncate_inode_pages_final(META_MAPPING(sbi)); | 541 | truncate_inode_pages_final(META_MAPPING(sbi)); |
537 | } | 542 | } |
538 | 543 | ||
539 | sbi->por_doing = false; | 544 | clear_sbi_flag(sbi, SBI_POR_DOING); |
540 | if (err) { | 545 | if (err) { |
541 | discard_next_dnode(sbi, blkaddr); | 546 | discard_next_dnode(sbi, blkaddr); |
542 | 547 | ||
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 42607a679923..daee4ab913da 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include "f2fs.h" | 20 | #include "f2fs.h" |
21 | #include "segment.h" | 21 | #include "segment.h" |
22 | #include "node.h" | 22 | #include "node.h" |
23 | #include "trace.h" | ||
23 | #include <trace/events/f2fs.h> | 24 | #include <trace/events/f2fs.h> |
24 | 25 | ||
25 | #define __reverse_ffz(x) __reverse_ffs(~(x)) | 26 | #define __reverse_ffz(x) __reverse_ffs(~(x)) |
@@ -181,6 +182,7 @@ void register_inmem_page(struct inode *inode, struct page *page) | |||
181 | int err; | 182 | int err; |
182 | 183 | ||
183 | SetPagePrivate(page); | 184 | SetPagePrivate(page); |
185 | f2fs_trace_pid(page); | ||
184 | 186 | ||
185 | new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); | 187 | new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); |
186 | 188 | ||
@@ -205,23 +207,6 @@ retry: | |||
205 | mutex_unlock(&fi->inmem_lock); | 207 | mutex_unlock(&fi->inmem_lock); |
206 | } | 208 | } |
207 | 209 | ||
208 | void invalidate_inmem_page(struct inode *inode, struct page *page) | ||
209 | { | ||
210 | struct f2fs_inode_info *fi = F2FS_I(inode); | ||
211 | struct inmem_pages *cur; | ||
212 | |||
213 | mutex_lock(&fi->inmem_lock); | ||
214 | cur = radix_tree_lookup(&fi->inmem_root, page->index); | ||
215 | if (cur) { | ||
216 | radix_tree_delete(&fi->inmem_root, cur->page->index); | ||
217 | f2fs_put_page(cur->page, 0); | ||
218 | list_del(&cur->list); | ||
219 | kmem_cache_free(inmem_entry_slab, cur); | ||
220 | dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); | ||
221 | } | ||
222 | mutex_unlock(&fi->inmem_lock); | ||
223 | } | ||
224 | |||
225 | void commit_inmem_pages(struct inode *inode, bool abort) | 210 | void commit_inmem_pages(struct inode *inode, bool abort) |
226 | { | 211 | { |
227 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | 212 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
@@ -230,7 +215,7 @@ void commit_inmem_pages(struct inode *inode, bool abort) | |||
230 | bool submit_bio = false; | 215 | bool submit_bio = false; |
231 | struct f2fs_io_info fio = { | 216 | struct f2fs_io_info fio = { |
232 | .type = DATA, | 217 | .type = DATA, |
233 | .rw = WRITE_SYNC, | 218 | .rw = WRITE_SYNC | REQ_PRIO, |
234 | }; | 219 | }; |
235 | 220 | ||
236 | /* | 221 | /* |
@@ -240,33 +225,38 @@ void commit_inmem_pages(struct inode *inode, bool abort) | |||
240 | * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this | 225 | * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this |
241 | * inode becomes free by iget_locked in f2fs_iget. | 226 | * inode becomes free by iget_locked in f2fs_iget. |
242 | */ | 227 | */ |
243 | if (!abort) | 228 | if (!abort) { |
244 | f2fs_balance_fs(sbi); | 229 | f2fs_balance_fs(sbi); |
245 | 230 | f2fs_lock_op(sbi); | |
246 | f2fs_lock_op(sbi); | 231 | } |
247 | 232 | ||
248 | mutex_lock(&fi->inmem_lock); | 233 | mutex_lock(&fi->inmem_lock); |
249 | list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { | 234 | list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { |
250 | lock_page(cur->page); | 235 | if (!abort) { |
251 | if (!abort && cur->page->mapping == inode->i_mapping) { | 236 | lock_page(cur->page); |
252 | f2fs_wait_on_page_writeback(cur->page, DATA); | 237 | if (cur->page->mapping == inode->i_mapping) { |
253 | if (clear_page_dirty_for_io(cur->page)) | 238 | f2fs_wait_on_page_writeback(cur->page, DATA); |
254 | inode_dec_dirty_pages(inode); | 239 | if (clear_page_dirty_for_io(cur->page)) |
255 | do_write_data_page(cur->page, &fio); | 240 | inode_dec_dirty_pages(inode); |
256 | submit_bio = true; | 241 | do_write_data_page(cur->page, &fio); |
242 | submit_bio = true; | ||
243 | } | ||
244 | f2fs_put_page(cur->page, 1); | ||
245 | } else { | ||
246 | put_page(cur->page); | ||
257 | } | 247 | } |
258 | radix_tree_delete(&fi->inmem_root, cur->page->index); | 248 | radix_tree_delete(&fi->inmem_root, cur->page->index); |
259 | f2fs_put_page(cur->page, 1); | ||
260 | list_del(&cur->list); | 249 | list_del(&cur->list); |
261 | kmem_cache_free(inmem_entry_slab, cur); | 250 | kmem_cache_free(inmem_entry_slab, cur); |
262 | dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); | 251 | dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); |
263 | } | 252 | } |
264 | if (submit_bio) | ||
265 | f2fs_submit_merged_bio(sbi, DATA, WRITE); | ||
266 | mutex_unlock(&fi->inmem_lock); | 253 | mutex_unlock(&fi->inmem_lock); |
267 | 254 | ||
268 | filemap_fdatawait_range(inode->i_mapping, 0, LLONG_MAX); | 255 | if (!abort) { |
269 | f2fs_unlock_op(sbi); | 256 | f2fs_unlock_op(sbi); |
257 | if (submit_bio) | ||
258 | f2fs_submit_merged_bio(sbi, DATA, WRITE); | ||
259 | } | ||
270 | } | 260 | } |
271 | 261 | ||
272 | /* | 262 | /* |
@@ -290,7 +280,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) | |||
290 | /* check the # of cached NAT entries and prefree segments */ | 280 | /* check the # of cached NAT entries and prefree segments */ |
291 | if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || | 281 | if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || |
292 | excess_prefree_segs(sbi) || | 282 | excess_prefree_segs(sbi) || |
293 | available_free_memory(sbi, INO_ENTRIES)) | 283 | !available_free_memory(sbi, INO_ENTRIES)) |
294 | f2fs_sync_fs(sbi->sb, true); | 284 | f2fs_sync_fs(sbi->sb, true); |
295 | } | 285 | } |
296 | 286 | ||
@@ -515,12 +505,13 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
515 | struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); | 505 | struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); |
516 | unsigned long *cur_map = (unsigned long *)se->cur_valid_map; | 506 | unsigned long *cur_map = (unsigned long *)se->cur_valid_map; |
517 | unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; | 507 | unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; |
518 | unsigned long dmap[entries]; | 508 | unsigned long *dmap = SIT_I(sbi)->tmp_map; |
519 | unsigned int start = 0, end = -1; | 509 | unsigned int start = 0, end = -1; |
520 | bool force = (cpc->reason == CP_DISCARD); | 510 | bool force = (cpc->reason == CP_DISCARD); |
521 | int i; | 511 | int i; |
522 | 512 | ||
523 | if (!force && !test_opt(sbi, DISCARD)) | 513 | if (!force && (!test_opt(sbi, DISCARD) || |
514 | SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards)) | ||
524 | return; | 515 | return; |
525 | 516 | ||
526 | if (force && !se->valid_blocks) { | 517 | if (force && !se->valid_blocks) { |
@@ -548,7 +539,8 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
548 | 539 | ||
549 | /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ | 540 | /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ |
550 | for (i = 0; i < entries; i++) | 541 | for (i = 0; i < entries; i++) |
551 | dmap[i] = ~(cur_map[i] | ckpt_map[i]); | 542 | dmap[i] = force ? ~ckpt_map[i] : |
543 | (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; | ||
552 | 544 | ||
553 | while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { | 545 | while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { |
554 | start = __find_rev_next_bit(dmap, max_blocks, end + 1); | 546 | start = __find_rev_next_bit(dmap, max_blocks, end + 1); |
@@ -735,7 +727,7 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, | |||
735 | /* | 727 | /* |
736 | * Calculate the number of current summary pages for writing | 728 | * Calculate the number of current summary pages for writing |
737 | */ | 729 | */ |
738 | int npages_for_summary_flush(struct f2fs_sb_info *sbi) | 730 | int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) |
739 | { | 731 | { |
740 | int valid_sum_count = 0; | 732 | int valid_sum_count = 0; |
741 | int i, sum_in_page; | 733 | int i, sum_in_page; |
@@ -743,8 +735,13 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi) | |||
743 | for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { | 735 | for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { |
744 | if (sbi->ckpt->alloc_type[i] == SSR) | 736 | if (sbi->ckpt->alloc_type[i] == SSR) |
745 | valid_sum_count += sbi->blocks_per_seg; | 737 | valid_sum_count += sbi->blocks_per_seg; |
746 | else | 738 | else { |
747 | valid_sum_count += curseg_blkoff(sbi, i); | 739 | if (for_ra) |
740 | valid_sum_count += le16_to_cpu( | ||
741 | F2FS_CKPT(sbi)->cur_data_blkoff[i]); | ||
742 | else | ||
743 | valid_sum_count += curseg_blkoff(sbi, i); | ||
744 | } | ||
748 | } | 745 | } |
749 | 746 | ||
750 | sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE - | 747 | sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE - |
@@ -803,7 +800,7 @@ static void get_new_segment(struct f2fs_sb_info *sbi, | |||
803 | int go_left = 0; | 800 | int go_left = 0; |
804 | int i; | 801 | int i; |
805 | 802 | ||
806 | write_lock(&free_i->segmap_lock); | 803 | spin_lock(&free_i->segmap_lock); |
807 | 804 | ||
808 | if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { | 805 | if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { |
809 | segno = find_next_zero_bit(free_i->free_segmap, | 806 | segno = find_next_zero_bit(free_i->free_segmap, |
@@ -876,7 +873,7 @@ got_it: | |||
876 | f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); | 873 | f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); |
877 | __set_inuse(sbi, segno); | 874 | __set_inuse(sbi, segno); |
878 | *newseg = segno; | 875 | *newseg = segno; |
879 | write_unlock(&free_i->segmap_lock); | 876 | spin_unlock(&free_i->segmap_lock); |
880 | } | 877 | } |
881 | 878 | ||
882 | static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) | 879 | static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) |
@@ -927,7 +924,7 @@ static void __next_free_blkoff(struct f2fs_sb_info *sbi, | |||
927 | { | 924 | { |
928 | struct seg_entry *se = get_seg_entry(sbi, seg->segno); | 925 | struct seg_entry *se = get_seg_entry(sbi, seg->segno); |
929 | int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); | 926 | int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); |
930 | unsigned long target_map[entries]; | 927 | unsigned long *target_map = SIT_I(sbi)->tmp_map; |
931 | unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; | 928 | unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; |
932 | unsigned long *cur_map = (unsigned long *)se->cur_valid_map; | 929 | unsigned long *cur_map = (unsigned long *)se->cur_valid_map; |
933 | int i, pos; | 930 | int i, pos; |
@@ -1027,18 +1024,22 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, | |||
1027 | stat_inc_seg_type(sbi, curseg); | 1024 | stat_inc_seg_type(sbi, curseg); |
1028 | } | 1025 | } |
1029 | 1026 | ||
1027 | static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type) | ||
1028 | { | ||
1029 | struct curseg_info *curseg = CURSEG_I(sbi, type); | ||
1030 | unsigned int old_segno; | ||
1031 | |||
1032 | old_segno = curseg->segno; | ||
1033 | SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); | ||
1034 | locate_dirty_segment(sbi, old_segno); | ||
1035 | } | ||
1036 | |||
1030 | void allocate_new_segments(struct f2fs_sb_info *sbi) | 1037 | void allocate_new_segments(struct f2fs_sb_info *sbi) |
1031 | { | 1038 | { |
1032 | struct curseg_info *curseg; | ||
1033 | unsigned int old_curseg; | ||
1034 | int i; | 1039 | int i; |
1035 | 1040 | ||
1036 | for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { | 1041 | for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) |
1037 | curseg = CURSEG_I(sbi, i); | 1042 | __allocate_new_segments(sbi, i); |
1038 | old_curseg = curseg->segno; | ||
1039 | SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); | ||
1040 | locate_dirty_segment(sbi, old_curseg); | ||
1041 | } | ||
1042 | } | 1043 | } |
1043 | 1044 | ||
1044 | static const struct segment_allocation default_salloc_ops = { | 1045 | static const struct segment_allocation default_salloc_ops = { |
@@ -1047,8 +1048,8 @@ static const struct segment_allocation default_salloc_ops = { | |||
1047 | 1048 | ||
1048 | int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) | 1049 | int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) |
1049 | { | 1050 | { |
1050 | __u64 start = range->start >> sbi->log_blocksize; | 1051 | __u64 start = F2FS_BYTES_TO_BLK(range->start); |
1051 | __u64 end = start + (range->len >> sbi->log_blocksize) - 1; | 1052 | __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; |
1052 | unsigned int start_segno, end_segno; | 1053 | unsigned int start_segno, end_segno; |
1053 | struct cp_control cpc; | 1054 | struct cp_control cpc; |
1054 | 1055 | ||
@@ -1065,16 +1066,21 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) | |||
1065 | end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : | 1066 | end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : |
1066 | GET_SEGNO(sbi, end); | 1067 | GET_SEGNO(sbi, end); |
1067 | cpc.reason = CP_DISCARD; | 1068 | cpc.reason = CP_DISCARD; |
1068 | cpc.trim_start = start_segno; | 1069 | cpc.trim_minlen = F2FS_BYTES_TO_BLK(range->minlen); |
1069 | cpc.trim_end = end_segno; | ||
1070 | cpc.trim_minlen = range->minlen >> sbi->log_blocksize; | ||
1071 | 1070 | ||
1072 | /* do checkpoint to issue discard commands safely */ | 1071 | /* do checkpoint to issue discard commands safely */ |
1073 | mutex_lock(&sbi->gc_mutex); | 1072 | for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { |
1074 | write_checkpoint(sbi, &cpc); | 1073 | cpc.trim_start = start_segno; |
1075 | mutex_unlock(&sbi->gc_mutex); | 1074 | cpc.trim_end = min_t(unsigned int, rounddown(start_segno + |
1075 | BATCHED_TRIM_SEGMENTS(sbi), | ||
1076 | sbi->segs_per_sec) - 1, end_segno); | ||
1077 | |||
1078 | mutex_lock(&sbi->gc_mutex); | ||
1079 | write_checkpoint(sbi, &cpc); | ||
1080 | mutex_unlock(&sbi->gc_mutex); | ||
1081 | } | ||
1076 | out: | 1082 | out: |
1077 | range->len = cpc.trimmed << sbi->log_blocksize; | 1083 | range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); |
1078 | return 0; | 1084 | return 0; |
1079 | } | 1085 | } |
1080 | 1086 | ||
@@ -1151,11 +1157,18 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, | |||
1151 | { | 1157 | { |
1152 | struct sit_info *sit_i = SIT_I(sbi); | 1158 | struct sit_info *sit_i = SIT_I(sbi); |
1153 | struct curseg_info *curseg; | 1159 | struct curseg_info *curseg; |
1160 | bool direct_io = (type == CURSEG_DIRECT_IO); | ||
1161 | |||
1162 | type = direct_io ? CURSEG_WARM_DATA : type; | ||
1154 | 1163 | ||
1155 | curseg = CURSEG_I(sbi, type); | 1164 | curseg = CURSEG_I(sbi, type); |
1156 | 1165 | ||
1157 | mutex_lock(&curseg->curseg_mutex); | 1166 | mutex_lock(&curseg->curseg_mutex); |
1158 | 1167 | ||
1168 | /* direct_io'ed data is aligned to the segment for better performance */ | ||
1169 | if (direct_io && curseg->next_blkoff) | ||
1170 | __allocate_new_segments(sbi, type); | ||
1171 | |||
1159 | *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); | 1172 | *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); |
1160 | 1173 | ||
1161 | /* | 1174 | /* |
@@ -1187,39 +1200,39 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, | |||
1187 | } | 1200 | } |
1188 | 1201 | ||
1189 | static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, | 1202 | static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, |
1190 | block_t old_blkaddr, block_t *new_blkaddr, | 1203 | struct f2fs_summary *sum, |
1191 | struct f2fs_summary *sum, struct f2fs_io_info *fio) | 1204 | struct f2fs_io_info *fio) |
1192 | { | 1205 | { |
1193 | int type = __get_segment_type(page, fio->type); | 1206 | int type = __get_segment_type(page, fio->type); |
1194 | 1207 | ||
1195 | allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type); | 1208 | allocate_data_block(sbi, page, fio->blk_addr, &fio->blk_addr, sum, type); |
1196 | 1209 | ||
1197 | /* writeout dirty page into bdev */ | 1210 | /* writeout dirty page into bdev */ |
1198 | f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio); | 1211 | f2fs_submit_page_mbio(sbi, page, fio); |
1199 | } | 1212 | } |
1200 | 1213 | ||
1201 | void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) | 1214 | void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) |
1202 | { | 1215 | { |
1203 | struct f2fs_io_info fio = { | 1216 | struct f2fs_io_info fio = { |
1204 | .type = META, | 1217 | .type = META, |
1205 | .rw = WRITE_SYNC | REQ_META | REQ_PRIO | 1218 | .rw = WRITE_SYNC | REQ_META | REQ_PRIO, |
1219 | .blk_addr = page->index, | ||
1206 | }; | 1220 | }; |
1207 | 1221 | ||
1208 | set_page_writeback(page); | 1222 | set_page_writeback(page); |
1209 | f2fs_submit_page_mbio(sbi, page, page->index, &fio); | 1223 | f2fs_submit_page_mbio(sbi, page, &fio); |
1210 | } | 1224 | } |
1211 | 1225 | ||
1212 | void write_node_page(struct f2fs_sb_info *sbi, struct page *page, | 1226 | void write_node_page(struct f2fs_sb_info *sbi, struct page *page, |
1213 | struct f2fs_io_info *fio, | 1227 | unsigned int nid, struct f2fs_io_info *fio) |
1214 | unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr) | ||
1215 | { | 1228 | { |
1216 | struct f2fs_summary sum; | 1229 | struct f2fs_summary sum; |
1217 | set_summary(&sum, nid, 0, 0); | 1230 | set_summary(&sum, nid, 0, 0); |
1218 | do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio); | 1231 | do_write_page(sbi, page, &sum, fio); |
1219 | } | 1232 | } |
1220 | 1233 | ||
1221 | void write_data_page(struct page *page, struct dnode_of_data *dn, | 1234 | void write_data_page(struct page *page, struct dnode_of_data *dn, |
1222 | block_t *new_blkaddr, struct f2fs_io_info *fio) | 1235 | struct f2fs_io_info *fio) |
1223 | { | 1236 | { |
1224 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); | 1237 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); |
1225 | struct f2fs_summary sum; | 1238 | struct f2fs_summary sum; |
@@ -1228,14 +1241,14 @@ void write_data_page(struct page *page, struct dnode_of_data *dn, | |||
1228 | f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); | 1241 | f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); |
1229 | get_node_info(sbi, dn->nid, &ni); | 1242 | get_node_info(sbi, dn->nid, &ni); |
1230 | set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); | 1243 | set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); |
1231 | 1244 | do_write_page(sbi, page, &sum, fio); | |
1232 | do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio); | 1245 | dn->data_blkaddr = fio->blk_addr; |
1233 | } | 1246 | } |
1234 | 1247 | ||
1235 | void rewrite_data_page(struct page *page, block_t old_blkaddr, | 1248 | void rewrite_data_page(struct page *page, struct f2fs_io_info *fio) |
1236 | struct f2fs_io_info *fio) | ||
1237 | { | 1249 | { |
1238 | f2fs_submit_page_mbio(F2FS_P_SB(page), page, old_blkaddr, fio); | 1250 | stat_inc_inplace_blocks(F2FS_P_SB(page)); |
1251 | f2fs_submit_page_mbio(F2FS_P_SB(page), page, fio); | ||
1239 | } | 1252 | } |
1240 | 1253 | ||
1241 | void recover_data_page(struct f2fs_sb_info *sbi, | 1254 | void recover_data_page(struct f2fs_sb_info *sbi, |
@@ -1393,7 +1406,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) | |||
1393 | segno = le32_to_cpu(ckpt->cur_data_segno[type]); | 1406 | segno = le32_to_cpu(ckpt->cur_data_segno[type]); |
1394 | blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type - | 1407 | blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type - |
1395 | CURSEG_HOT_DATA]); | 1408 | CURSEG_HOT_DATA]); |
1396 | if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) | 1409 | if (__exist_node_summaries(sbi)) |
1397 | blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type); | 1410 | blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type); |
1398 | else | 1411 | else |
1399 | blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type); | 1412 | blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type); |
@@ -1402,7 +1415,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) | |||
1402 | CURSEG_HOT_NODE]); | 1415 | CURSEG_HOT_NODE]); |
1403 | blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type - | 1416 | blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type - |
1404 | CURSEG_HOT_NODE]); | 1417 | CURSEG_HOT_NODE]); |
1405 | if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) | 1418 | if (__exist_node_summaries(sbi)) |
1406 | blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE, | 1419 | blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE, |
1407 | type - CURSEG_HOT_NODE); | 1420 | type - CURSEG_HOT_NODE); |
1408 | else | 1421 | else |
@@ -1413,7 +1426,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) | |||
1413 | sum = (struct f2fs_summary_block *)page_address(new); | 1426 | sum = (struct f2fs_summary_block *)page_address(new); |
1414 | 1427 | ||
1415 | if (IS_NODESEG(type)) { | 1428 | if (IS_NODESEG(type)) { |
1416 | if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) { | 1429 | if (__exist_node_summaries(sbi)) { |
1417 | struct f2fs_summary *ns = &sum->entries[0]; | 1430 | struct f2fs_summary *ns = &sum->entries[0]; |
1418 | int i; | 1431 | int i; |
1419 | for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { | 1432 | for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { |
@@ -1450,12 +1463,22 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) | |||
1450 | int err; | 1463 | int err; |
1451 | 1464 | ||
1452 | if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { | 1465 | if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { |
1466 | int npages = npages_for_summary_flush(sbi, true); | ||
1467 | |||
1468 | if (npages >= 2) | ||
1469 | ra_meta_pages(sbi, start_sum_block(sbi), npages, | ||
1470 | META_CP); | ||
1471 | |||
1453 | /* restore for compacted data summary */ | 1472 | /* restore for compacted data summary */ |
1454 | if (read_compacted_summaries(sbi)) | 1473 | if (read_compacted_summaries(sbi)) |
1455 | return -EINVAL; | 1474 | return -EINVAL; |
1456 | type = CURSEG_HOT_NODE; | 1475 | type = CURSEG_HOT_NODE; |
1457 | } | 1476 | } |
1458 | 1477 | ||
1478 | if (__exist_node_summaries(sbi)) | ||
1479 | ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type), | ||
1480 | NR_CURSEG_TYPE - type, META_CP); | ||
1481 | |||
1459 | for (; type <= CURSEG_COLD_NODE; type++) { | 1482 | for (; type <= CURSEG_COLD_NODE; type++) { |
1460 | err = read_normal_summaries(sbi, type); | 1483 | err = read_normal_summaries(sbi, type); |
1461 | if (err) | 1484 | if (err) |
@@ -1549,8 +1572,7 @@ void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) | |||
1549 | 1572 | ||
1550 | void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) | 1573 | void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) |
1551 | { | 1574 | { |
1552 | if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) | 1575 | write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); |
1553 | write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); | ||
1554 | } | 1576 | } |
1555 | 1577 | ||
1556 | int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, | 1578 | int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, |
@@ -1754,7 +1776,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
1754 | se = get_seg_entry(sbi, segno); | 1776 | se = get_seg_entry(sbi, segno); |
1755 | 1777 | ||
1756 | /* add discard candidates */ | 1778 | /* add discard candidates */ |
1757 | if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) { | 1779 | if (cpc->reason != CP_DISCARD) { |
1758 | cpc->trim_start = segno; | 1780 | cpc->trim_start = segno; |
1759 | add_discard_addrs(sbi, cpc); | 1781 | add_discard_addrs(sbi, cpc); |
1760 | } | 1782 | } |
@@ -1833,6 +1855,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi) | |||
1833 | return -ENOMEM; | 1855 | return -ENOMEM; |
1834 | } | 1856 | } |
1835 | 1857 | ||
1858 | sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); | ||
1859 | if (!sit_i->tmp_map) | ||
1860 | return -ENOMEM; | ||
1861 | |||
1836 | if (sbi->segs_per_sec > 1) { | 1862 | if (sbi->segs_per_sec > 1) { |
1837 | sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) * | 1863 | sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) * |
1838 | sizeof(struct sec_entry)); | 1864 | sizeof(struct sec_entry)); |
@@ -1897,7 +1923,7 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) | |||
1897 | free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi)); | 1923 | free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi)); |
1898 | free_i->free_segments = 0; | 1924 | free_i->free_segments = 0; |
1899 | free_i->free_sections = 0; | 1925 | free_i->free_sections = 0; |
1900 | rwlock_init(&free_i->segmap_lock); | 1926 | spin_lock_init(&free_i->segmap_lock); |
1901 | return 0; | 1927 | return 0; |
1902 | } | 1928 | } |
1903 | 1929 | ||
@@ -2110,6 +2136,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi) | |||
2110 | sm_info->nr_discards = 0; | 2136 | sm_info->nr_discards = 0; |
2111 | sm_info->max_discards = 0; | 2137 | sm_info->max_discards = 0; |
2112 | 2138 | ||
2139 | sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; | ||
2140 | |||
2113 | INIT_LIST_HEAD(&sm_info->sit_entry_set); | 2141 | INIT_LIST_HEAD(&sm_info->sit_entry_set); |
2114 | 2142 | ||
2115 | if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { | 2143 | if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { |
@@ -2212,6 +2240,8 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) | |||
2212 | kfree(sit_i->sentries[start].ckpt_valid_map); | 2240 | kfree(sit_i->sentries[start].ckpt_valid_map); |
2213 | } | 2241 | } |
2214 | } | 2242 | } |
2243 | kfree(sit_i->tmp_map); | ||
2244 | |||
2215 | vfree(sit_i->sentries); | 2245 | vfree(sit_i->sentries); |
2216 | vfree(sit_i->sec_entries); | 2246 | vfree(sit_i->sec_entries); |
2217 | kfree(sit_i->dirty_sentries_bitmap); | 2247 | kfree(sit_i->dirty_sentries_bitmap); |
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 7f327c0ba4e3..7fd35111cf62 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h | |||
@@ -189,6 +189,7 @@ struct sit_info { | |||
189 | char *sit_bitmap; /* SIT bitmap pointer */ | 189 | char *sit_bitmap; /* SIT bitmap pointer */ |
190 | unsigned int bitmap_size; /* SIT bitmap size */ | 190 | unsigned int bitmap_size; /* SIT bitmap size */ |
191 | 191 | ||
192 | unsigned long *tmp_map; /* bitmap for temporal use */ | ||
192 | unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */ | 193 | unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */ |
193 | unsigned int dirty_sentries; /* # of dirty sentries */ | 194 | unsigned int dirty_sentries; /* # of dirty sentries */ |
194 | unsigned int sents_per_block; /* # of SIT entries per block */ | 195 | unsigned int sents_per_block; /* # of SIT entries per block */ |
@@ -207,7 +208,7 @@ struct free_segmap_info { | |||
207 | unsigned int start_segno; /* start segment number logically */ | 208 | unsigned int start_segno; /* start segment number logically */ |
208 | unsigned int free_segments; /* # of free segments */ | 209 | unsigned int free_segments; /* # of free segments */ |
209 | unsigned int free_sections; /* # of free sections */ | 210 | unsigned int free_sections; /* # of free sections */ |
210 | rwlock_t segmap_lock; /* free segmap lock */ | 211 | spinlock_t segmap_lock; /* free segmap lock */ |
211 | unsigned long *free_segmap; /* free segment bitmap */ | 212 | unsigned long *free_segmap; /* free segment bitmap */ |
212 | unsigned long *free_secmap; /* free section bitmap */ | 213 | unsigned long *free_secmap; /* free section bitmap */ |
213 | }; | 214 | }; |
@@ -318,9 +319,9 @@ static inline unsigned int find_next_inuse(struct free_segmap_info *free_i, | |||
318 | unsigned int max, unsigned int segno) | 319 | unsigned int max, unsigned int segno) |
319 | { | 320 | { |
320 | unsigned int ret; | 321 | unsigned int ret; |
321 | read_lock(&free_i->segmap_lock); | 322 | spin_lock(&free_i->segmap_lock); |
322 | ret = find_next_bit(free_i->free_segmap, max, segno); | 323 | ret = find_next_bit(free_i->free_segmap, max, segno); |
323 | read_unlock(&free_i->segmap_lock); | 324 | spin_unlock(&free_i->segmap_lock); |
324 | return ret; | 325 | return ret; |
325 | } | 326 | } |
326 | 327 | ||
@@ -331,7 +332,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) | |||
331 | unsigned int start_segno = secno * sbi->segs_per_sec; | 332 | unsigned int start_segno = secno * sbi->segs_per_sec; |
332 | unsigned int next; | 333 | unsigned int next; |
333 | 334 | ||
334 | write_lock(&free_i->segmap_lock); | 335 | spin_lock(&free_i->segmap_lock); |
335 | clear_bit(segno, free_i->free_segmap); | 336 | clear_bit(segno, free_i->free_segmap); |
336 | free_i->free_segments++; | 337 | free_i->free_segments++; |
337 | 338 | ||
@@ -340,7 +341,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) | |||
340 | clear_bit(secno, free_i->free_secmap); | 341 | clear_bit(secno, free_i->free_secmap); |
341 | free_i->free_sections++; | 342 | free_i->free_sections++; |
342 | } | 343 | } |
343 | write_unlock(&free_i->segmap_lock); | 344 | spin_unlock(&free_i->segmap_lock); |
344 | } | 345 | } |
345 | 346 | ||
346 | static inline void __set_inuse(struct f2fs_sb_info *sbi, | 347 | static inline void __set_inuse(struct f2fs_sb_info *sbi, |
@@ -362,7 +363,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, | |||
362 | unsigned int start_segno = secno * sbi->segs_per_sec; | 363 | unsigned int start_segno = secno * sbi->segs_per_sec; |
363 | unsigned int next; | 364 | unsigned int next; |
364 | 365 | ||
365 | write_lock(&free_i->segmap_lock); | 366 | spin_lock(&free_i->segmap_lock); |
366 | if (test_and_clear_bit(segno, free_i->free_segmap)) { | 367 | if (test_and_clear_bit(segno, free_i->free_segmap)) { |
367 | free_i->free_segments++; | 368 | free_i->free_segments++; |
368 | 369 | ||
@@ -373,7 +374,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, | |||
373 | free_i->free_sections++; | 374 | free_i->free_sections++; |
374 | } | 375 | } |
375 | } | 376 | } |
376 | write_unlock(&free_i->segmap_lock); | 377 | spin_unlock(&free_i->segmap_lock); |
377 | } | 378 | } |
378 | 379 | ||
379 | static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, | 380 | static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, |
@@ -381,13 +382,13 @@ static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, | |||
381 | { | 382 | { |
382 | struct free_segmap_info *free_i = FREE_I(sbi); | 383 | struct free_segmap_info *free_i = FREE_I(sbi); |
383 | unsigned int secno = segno / sbi->segs_per_sec; | 384 | unsigned int secno = segno / sbi->segs_per_sec; |
384 | write_lock(&free_i->segmap_lock); | 385 | spin_lock(&free_i->segmap_lock); |
385 | if (!test_and_set_bit(segno, free_i->free_segmap)) { | 386 | if (!test_and_set_bit(segno, free_i->free_segmap)) { |
386 | free_i->free_segments--; | 387 | free_i->free_segments--; |
387 | if (!test_and_set_bit(secno, free_i->free_secmap)) | 388 | if (!test_and_set_bit(secno, free_i->free_secmap)) |
388 | free_i->free_sections--; | 389 | free_i->free_sections--; |
389 | } | 390 | } |
390 | write_unlock(&free_i->segmap_lock); | 391 | spin_unlock(&free_i->segmap_lock); |
391 | } | 392 | } |
392 | 393 | ||
393 | static inline void get_sit_bitmap(struct f2fs_sb_info *sbi, | 394 | static inline void get_sit_bitmap(struct f2fs_sb_info *sbi, |
@@ -460,7 +461,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) | |||
460 | int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); | 461 | int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); |
461 | int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); | 462 | int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); |
462 | 463 | ||
463 | if (unlikely(sbi->por_doing)) | 464 | if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) |
464 | return false; | 465 | return false; |
465 | 466 | ||
466 | return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + | 467 | return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + |
@@ -599,13 +600,13 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, | |||
599 | static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) | 600 | static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) |
600 | { | 601 | { |
601 | if (segno > TOTAL_SEGS(sbi) - 1) | 602 | if (segno > TOTAL_SEGS(sbi) - 1) |
602 | sbi->need_fsck = true; | 603 | set_sbi_flag(sbi, SBI_NEED_FSCK); |
603 | } | 604 | } |
604 | 605 | ||
605 | static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) | 606 | static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) |
606 | { | 607 | { |
607 | if (blk_addr < SEG0_BLKADDR(sbi) || blk_addr >= MAX_BLKADDR(sbi)) | 608 | if (blk_addr < SEG0_BLKADDR(sbi) || blk_addr >= MAX_BLKADDR(sbi)) |
608 | sbi->need_fsck = true; | 609 | set_sbi_flag(sbi, SBI_NEED_FSCK); |
609 | } | 610 | } |
610 | 611 | ||
611 | /* | 612 | /* |
@@ -616,11 +617,11 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, | |||
616 | { | 617 | { |
617 | /* check segment usage */ | 618 | /* check segment usage */ |
618 | if (GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg) | 619 | if (GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg) |
619 | sbi->need_fsck = true; | 620 | set_sbi_flag(sbi, SBI_NEED_FSCK); |
620 | 621 | ||
621 | /* check boundary of a given segment number */ | 622 | /* check boundary of a given segment number */ |
622 | if (segno > TOTAL_SEGS(sbi) - 1) | 623 | if (segno > TOTAL_SEGS(sbi) - 1) |
623 | sbi->need_fsck = true; | 624 | set_sbi_flag(sbi, SBI_NEED_FSCK); |
624 | } | 625 | } |
625 | #endif | 626 | #endif |
626 | 627 | ||
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f71421d70475..f2fe666a6ea9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include "segment.h" | 30 | #include "segment.h" |
31 | #include "xattr.h" | 31 | #include "xattr.h" |
32 | #include "gc.h" | 32 | #include "gc.h" |
33 | #include "trace.h" | ||
33 | 34 | ||
34 | #define CREATE_TRACE_POINTS | 35 | #define CREATE_TRACE_POINTS |
35 | #include <trace/events/f2fs.h> | 36 | #include <trace/events/f2fs.h> |
@@ -41,6 +42,7 @@ static struct kset *f2fs_kset; | |||
41 | enum { | 42 | enum { |
42 | Opt_gc_background, | 43 | Opt_gc_background, |
43 | Opt_disable_roll_forward, | 44 | Opt_disable_roll_forward, |
45 | Opt_norecovery, | ||
44 | Opt_discard, | 46 | Opt_discard, |
45 | Opt_noheap, | 47 | Opt_noheap, |
46 | Opt_user_xattr, | 48 | Opt_user_xattr, |
@@ -61,6 +63,7 @@ enum { | |||
61 | static match_table_t f2fs_tokens = { | 63 | static match_table_t f2fs_tokens = { |
62 | {Opt_gc_background, "background_gc=%s"}, | 64 | {Opt_gc_background, "background_gc=%s"}, |
63 | {Opt_disable_roll_forward, "disable_roll_forward"}, | 65 | {Opt_disable_roll_forward, "disable_roll_forward"}, |
66 | {Opt_norecovery, "norecovery"}, | ||
64 | {Opt_discard, "discard"}, | 67 | {Opt_discard, "discard"}, |
65 | {Opt_noheap, "no_heap"}, | 68 | {Opt_noheap, "no_heap"}, |
66 | {Opt_user_xattr, "user_xattr"}, | 69 | {Opt_user_xattr, "user_xattr"}, |
@@ -192,6 +195,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); | |||
192 | F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); | 195 | F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); |
193 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); | 196 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); |
194 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); | 197 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); |
198 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); | ||
195 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); | 199 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); |
196 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); | 200 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); |
197 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); | 201 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); |
@@ -207,6 +211,7 @@ static struct attribute *f2fs_attrs[] = { | |||
207 | ATTR_LIST(gc_idle), | 211 | ATTR_LIST(gc_idle), |
208 | ATTR_LIST(reclaim_segments), | 212 | ATTR_LIST(reclaim_segments), |
209 | ATTR_LIST(max_small_discards), | 213 | ATTR_LIST(max_small_discards), |
214 | ATTR_LIST(batched_trim_sections), | ||
210 | ATTR_LIST(ipu_policy), | 215 | ATTR_LIST(ipu_policy), |
211 | ATTR_LIST(min_ipu_util), | 216 | ATTR_LIST(min_ipu_util), |
212 | ATTR_LIST(min_fsync_blocks), | 217 | ATTR_LIST(min_fsync_blocks), |
@@ -286,6 +291,12 @@ static int parse_options(struct super_block *sb, char *options) | |||
286 | case Opt_disable_roll_forward: | 291 | case Opt_disable_roll_forward: |
287 | set_opt(sbi, DISABLE_ROLL_FORWARD); | 292 | set_opt(sbi, DISABLE_ROLL_FORWARD); |
288 | break; | 293 | break; |
294 | case Opt_norecovery: | ||
295 | /* this option mounts f2fs with ro */ | ||
296 | set_opt(sbi, DISABLE_ROLL_FORWARD); | ||
297 | if (!f2fs_readonly(sb)) | ||
298 | return -EINVAL; | ||
299 | break; | ||
289 | case Opt_discard: | 300 | case Opt_discard: |
290 | set_opt(sbi, DISCARD); | 301 | set_opt(sbi, DISCARD); |
291 | break; | 302 | break; |
@@ -446,8 +457,13 @@ static void f2fs_put_super(struct super_block *sb) | |||
446 | f2fs_destroy_stats(sbi); | 457 | f2fs_destroy_stats(sbi); |
447 | stop_gc_thread(sbi); | 458 | stop_gc_thread(sbi); |
448 | 459 | ||
449 | /* We don't need to do checkpoint when it's clean */ | 460 | /* |
450 | if (sbi->s_dirty) { | 461 | * We don't need to do checkpoint when superblock is clean. |
462 | * But, the previous checkpoint was not done by umount, it needs to do | ||
463 | * clean checkpoint again. | ||
464 | */ | ||
465 | if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) || | ||
466 | !is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) { | ||
451 | struct cp_control cpc = { | 467 | struct cp_control cpc = { |
452 | .reason = CP_UMOUNT, | 468 | .reason = CP_UMOUNT, |
453 | }; | 469 | }; |
@@ -486,13 +502,15 @@ int f2fs_sync_fs(struct super_block *sb, int sync) | |||
486 | if (sync) { | 502 | if (sync) { |
487 | struct cp_control cpc; | 503 | struct cp_control cpc; |
488 | 504 | ||
489 | cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC; | 505 | cpc.reason = __get_cp_reason(sbi); |
506 | |||
490 | mutex_lock(&sbi->gc_mutex); | 507 | mutex_lock(&sbi->gc_mutex); |
491 | write_checkpoint(sbi, &cpc); | 508 | write_checkpoint(sbi, &cpc); |
492 | mutex_unlock(&sbi->gc_mutex); | 509 | mutex_unlock(&sbi->gc_mutex); |
493 | } else { | 510 | } else { |
494 | f2fs_balance_fs(sbi); | 511 | f2fs_balance_fs(sbi); |
495 | } | 512 | } |
513 | f2fs_trace_ios(NULL, NULL, 1); | ||
496 | 514 | ||
497 | return 0; | 515 | return 0; |
498 | } | 516 | } |
@@ -887,7 +905,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) | |||
887 | atomic_set(&sbi->nr_pages[i], 0); | 905 | atomic_set(&sbi->nr_pages[i], 0); |
888 | 906 | ||
889 | sbi->dir_level = DEF_DIR_LEVEL; | 907 | sbi->dir_level = DEF_DIR_LEVEL; |
890 | sbi->need_fsck = false; | 908 | clear_sbi_flag(sbi, SBI_NEED_FSCK); |
891 | } | 909 | } |
892 | 910 | ||
893 | /* | 911 | /* |
@@ -942,6 +960,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |||
942 | struct inode *root; | 960 | struct inode *root; |
943 | long err = -EINVAL; | 961 | long err = -EINVAL; |
944 | bool retry = true; | 962 | bool retry = true; |
963 | char *options = NULL; | ||
945 | int i; | 964 | int i; |
946 | 965 | ||
947 | try_onemore: | 966 | try_onemore: |
@@ -973,9 +992,15 @@ try_onemore: | |||
973 | set_opt(sbi, POSIX_ACL); | 992 | set_opt(sbi, POSIX_ACL); |
974 | #endif | 993 | #endif |
975 | /* parse mount options */ | 994 | /* parse mount options */ |
976 | err = parse_options(sb, (char *)data); | 995 | options = kstrdup((const char *)data, GFP_KERNEL); |
977 | if (err) | 996 | if (data && !options) { |
997 | err = -ENOMEM; | ||
978 | goto free_sb_buf; | 998 | goto free_sb_buf; |
999 | } | ||
1000 | |||
1001 | err = parse_options(sb, options); | ||
1002 | if (err) | ||
1003 | goto free_options; | ||
979 | 1004 | ||
980 | sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); | 1005 | sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); |
981 | sb->s_max_links = F2FS_LINK_MAX; | 1006 | sb->s_max_links = F2FS_LINK_MAX; |
@@ -998,7 +1023,7 @@ try_onemore: | |||
998 | mutex_init(&sbi->writepages); | 1023 | mutex_init(&sbi->writepages); |
999 | mutex_init(&sbi->cp_mutex); | 1024 | mutex_init(&sbi->cp_mutex); |
1000 | init_rwsem(&sbi->node_write); | 1025 | init_rwsem(&sbi->node_write); |
1001 | sbi->por_doing = false; | 1026 | clear_sbi_flag(sbi, SBI_POR_DOING); |
1002 | spin_lock_init(&sbi->stat_lock); | 1027 | spin_lock_init(&sbi->stat_lock); |
1003 | 1028 | ||
1004 | init_rwsem(&sbi->read_io.io_rwsem); | 1029 | init_rwsem(&sbi->read_io.io_rwsem); |
@@ -1019,7 +1044,7 @@ try_onemore: | |||
1019 | if (IS_ERR(sbi->meta_inode)) { | 1044 | if (IS_ERR(sbi->meta_inode)) { |
1020 | f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode"); | 1045 | f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode"); |
1021 | err = PTR_ERR(sbi->meta_inode); | 1046 | err = PTR_ERR(sbi->meta_inode); |
1022 | goto free_sb_buf; | 1047 | goto free_options; |
1023 | } | 1048 | } |
1024 | 1049 | ||
1025 | err = get_valid_checkpoint(sbi); | 1050 | err = get_valid_checkpoint(sbi); |
@@ -1122,10 +1147,19 @@ try_onemore: | |||
1122 | goto free_proc; | 1147 | goto free_proc; |
1123 | 1148 | ||
1124 | if (!retry) | 1149 | if (!retry) |
1125 | sbi->need_fsck = true; | 1150 | set_sbi_flag(sbi, SBI_NEED_FSCK); |
1126 | 1151 | ||
1127 | /* recover fsynced data */ | 1152 | /* recover fsynced data */ |
1128 | if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { | 1153 | if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { |
1154 | /* | ||
1155 | * mount should be failed, when device has readonly mode, and | ||
1156 | * previous checkpoint was not done by clean system shutdown. | ||
1157 | */ | ||
1158 | if (bdev_read_only(sb->s_bdev) && | ||
1159 | !is_set_ckpt_flags(sbi->ckpt, CP_UMOUNT_FLAG)) { | ||
1160 | err = -EROFS; | ||
1161 | goto free_kobj; | ||
1162 | } | ||
1129 | err = recover_fsync_data(sbi); | 1163 | err = recover_fsync_data(sbi); |
1130 | if (err) { | 1164 | if (err) { |
1131 | f2fs_msg(sb, KERN_ERR, | 1165 | f2fs_msg(sb, KERN_ERR, |
@@ -1144,6 +1178,7 @@ try_onemore: | |||
1144 | if (err) | 1178 | if (err) |
1145 | goto free_kobj; | 1179 | goto free_kobj; |
1146 | } | 1180 | } |
1181 | kfree(options); | ||
1147 | return 0; | 1182 | return 0; |
1148 | 1183 | ||
1149 | free_kobj: | 1184 | free_kobj: |
@@ -1168,6 +1203,8 @@ free_cp: | |||
1168 | free_meta_inode: | 1203 | free_meta_inode: |
1169 | make_bad_inode(sbi->meta_inode); | 1204 | make_bad_inode(sbi->meta_inode); |
1170 | iput(sbi->meta_inode); | 1205 | iput(sbi->meta_inode); |
1206 | free_options: | ||
1207 | kfree(options); | ||
1171 | free_sb_buf: | 1208 | free_sb_buf: |
1172 | brelse(raw_super_buf); | 1209 | brelse(raw_super_buf); |
1173 | free_sbi: | 1210 | free_sbi: |
@@ -1188,11 +1225,18 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, | |||
1188 | return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super); | 1225 | return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super); |
1189 | } | 1226 | } |
1190 | 1227 | ||
1228 | static void kill_f2fs_super(struct super_block *sb) | ||
1229 | { | ||
1230 | if (sb->s_root) | ||
1231 | set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE); | ||
1232 | kill_block_super(sb); | ||
1233 | } | ||
1234 | |||
1191 | static struct file_system_type f2fs_fs_type = { | 1235 | static struct file_system_type f2fs_fs_type = { |
1192 | .owner = THIS_MODULE, | 1236 | .owner = THIS_MODULE, |
1193 | .name = "f2fs", | 1237 | .name = "f2fs", |
1194 | .mount = f2fs_mount, | 1238 | .mount = f2fs_mount, |
1195 | .kill_sb = kill_block_super, | 1239 | .kill_sb = kill_f2fs_super, |
1196 | .fs_flags = FS_REQUIRES_DEV, | 1240 | .fs_flags = FS_REQUIRES_DEV, |
1197 | }; | 1241 | }; |
1198 | MODULE_ALIAS_FS("f2fs"); | 1242 | MODULE_ALIAS_FS("f2fs"); |
@@ -1220,6 +1264,8 @@ static int __init init_f2fs_fs(void) | |||
1220 | { | 1264 | { |
1221 | int err; | 1265 | int err; |
1222 | 1266 | ||
1267 | f2fs_build_trace_ios(); | ||
1268 | |||
1223 | err = init_inodecache(); | 1269 | err = init_inodecache(); |
1224 | if (err) | 1270 | if (err) |
1225 | goto fail; | 1271 | goto fail; |
@@ -1229,12 +1275,9 @@ static int __init init_f2fs_fs(void) | |||
1229 | err = create_segment_manager_caches(); | 1275 | err = create_segment_manager_caches(); |
1230 | if (err) | 1276 | if (err) |
1231 | goto free_node_manager_caches; | 1277 | goto free_node_manager_caches; |
1232 | err = create_gc_caches(); | ||
1233 | if (err) | ||
1234 | goto free_segment_manager_caches; | ||
1235 | err = create_checkpoint_caches(); | 1278 | err = create_checkpoint_caches(); |
1236 | if (err) | 1279 | if (err) |
1237 | goto free_gc_caches; | 1280 | goto free_segment_manager_caches; |
1238 | f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); | 1281 | f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); |
1239 | if (!f2fs_kset) { | 1282 | if (!f2fs_kset) { |
1240 | err = -ENOMEM; | 1283 | err = -ENOMEM; |
@@ -1251,8 +1294,6 @@ free_kset: | |||
1251 | kset_unregister(f2fs_kset); | 1294 | kset_unregister(f2fs_kset); |
1252 | free_checkpoint_caches: | 1295 | free_checkpoint_caches: |
1253 | destroy_checkpoint_caches(); | 1296 | destroy_checkpoint_caches(); |
1254 | free_gc_caches: | ||
1255 | destroy_gc_caches(); | ||
1256 | free_segment_manager_caches: | 1297 | free_segment_manager_caches: |
1257 | destroy_segment_manager_caches(); | 1298 | destroy_segment_manager_caches(); |
1258 | free_node_manager_caches: | 1299 | free_node_manager_caches: |
@@ -1269,11 +1310,11 @@ static void __exit exit_f2fs_fs(void) | |||
1269 | f2fs_destroy_root_stats(); | 1310 | f2fs_destroy_root_stats(); |
1270 | unregister_filesystem(&f2fs_fs_type); | 1311 | unregister_filesystem(&f2fs_fs_type); |
1271 | destroy_checkpoint_caches(); | 1312 | destroy_checkpoint_caches(); |
1272 | destroy_gc_caches(); | ||
1273 | destroy_segment_manager_caches(); | 1313 | destroy_segment_manager_caches(); |
1274 | destroy_node_manager_caches(); | 1314 | destroy_node_manager_caches(); |
1275 | destroy_inodecache(); | 1315 | destroy_inodecache(); |
1276 | kset_unregister(f2fs_kset); | 1316 | kset_unregister(f2fs_kset); |
1317 | f2fs_destroy_trace_ios(); | ||
1277 | } | 1318 | } |
1278 | 1319 | ||
1279 | module_init(init_f2fs_fs) | 1320 | module_init(init_f2fs_fs) |
diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c new file mode 100644 index 000000000000..875aa8179bc1 --- /dev/null +++ b/fs/f2fs/trace.c | |||
@@ -0,0 +1,159 @@ | |||
1 | /* | ||
2 | * f2fs IO tracer | ||
3 | * | ||
4 | * Copyright (c) 2014 Motorola Mobility | ||
5 | * Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | #include <linux/fs.h> | ||
12 | #include <linux/f2fs_fs.h> | ||
13 | #include <linux/sched.h> | ||
14 | #include <linux/radix-tree.h> | ||
15 | |||
16 | #include "f2fs.h" | ||
17 | #include "trace.h" | ||
18 | |||
19 | static RADIX_TREE(pids, GFP_ATOMIC); | ||
20 | static spinlock_t pids_lock; | ||
21 | static struct last_io_info last_io; | ||
22 | |||
23 | static inline void __print_last_io(void) | ||
24 | { | ||
25 | if (!last_io.len) | ||
26 | return; | ||
27 | |||
28 | trace_printk("%3x:%3x %4x %-16s %2x %5x %12x %4x\n", | ||
29 | last_io.major, last_io.minor, | ||
30 | last_io.pid, "----------------", | ||
31 | last_io.type, | ||
32 | last_io.fio.rw, last_io.fio.blk_addr, | ||
33 | last_io.len); | ||
34 | memset(&last_io, 0, sizeof(last_io)); | ||
35 | } | ||
36 | |||
37 | static int __file_type(struct inode *inode, pid_t pid) | ||
38 | { | ||
39 | if (f2fs_is_atomic_file(inode)) | ||
40 | return __ATOMIC_FILE; | ||
41 | else if (f2fs_is_volatile_file(inode)) | ||
42 | return __VOLATILE_FILE; | ||
43 | else if (S_ISDIR(inode->i_mode)) | ||
44 | return __DIR_FILE; | ||
45 | else if (inode->i_ino == F2FS_NODE_INO(F2FS_I_SB(inode))) | ||
46 | return __NODE_FILE; | ||
47 | else if (inode->i_ino == F2FS_META_INO(F2FS_I_SB(inode))) | ||
48 | return __META_FILE; | ||
49 | else if (pid) | ||
50 | return __NORMAL_FILE; | ||
51 | else | ||
52 | return __MISC_FILE; | ||
53 | } | ||
54 | |||
55 | void f2fs_trace_pid(struct page *page) | ||
56 | { | ||
57 | struct inode *inode = page->mapping->host; | ||
58 | pid_t pid = task_pid_nr(current); | ||
59 | void *p; | ||
60 | |||
61 | page->private = pid; | ||
62 | |||
63 | if (radix_tree_preload(GFP_NOFS)) | ||
64 | return; | ||
65 | |||
66 | spin_lock(&pids_lock); | ||
67 | p = radix_tree_lookup(&pids, pid); | ||
68 | if (p == current) | ||
69 | goto out; | ||
70 | if (p) | ||
71 | radix_tree_delete(&pids, pid); | ||
72 | |||
73 | f2fs_radix_tree_insert(&pids, pid, current); | ||
74 | |||
75 | trace_printk("%3x:%3x %4x %-16s\n", | ||
76 | MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), | ||
77 | pid, current->comm); | ||
78 | out: | ||
79 | spin_unlock(&pids_lock); | ||
80 | radix_tree_preload_end(); | ||
81 | } | ||
82 | |||
83 | void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush) | ||
84 | { | ||
85 | struct inode *inode; | ||
86 | pid_t pid; | ||
87 | int major, minor; | ||
88 | |||
89 | if (flush) { | ||
90 | __print_last_io(); | ||
91 | return; | ||
92 | } | ||
93 | |||
94 | inode = page->mapping->host; | ||
95 | pid = page_private(page); | ||
96 | |||
97 | major = MAJOR(inode->i_sb->s_dev); | ||
98 | minor = MINOR(inode->i_sb->s_dev); | ||
99 | |||
100 | if (last_io.major == major && last_io.minor == minor && | ||
101 | last_io.pid == pid && | ||
102 | last_io.type == __file_type(inode, pid) && | ||
103 | last_io.fio.rw == fio->rw && | ||
104 | last_io.fio.blk_addr + last_io.len == fio->blk_addr) { | ||
105 | last_io.len++; | ||
106 | return; | ||
107 | } | ||
108 | |||
109 | __print_last_io(); | ||
110 | |||
111 | last_io.major = major; | ||
112 | last_io.minor = minor; | ||
113 | last_io.pid = pid; | ||
114 | last_io.type = __file_type(inode, pid); | ||
115 | last_io.fio = *fio; | ||
116 | last_io.len = 1; | ||
117 | return; | ||
118 | } | ||
119 | |||
120 | void f2fs_build_trace_ios(void) | ||
121 | { | ||
122 | spin_lock_init(&pids_lock); | ||
123 | } | ||
124 | |||
125 | #define PIDVEC_SIZE 128 | ||
126 | static unsigned int gang_lookup_pids(pid_t *results, unsigned long first_index, | ||
127 | unsigned int max_items) | ||
128 | { | ||
129 | struct radix_tree_iter iter; | ||
130 | void **slot; | ||
131 | unsigned int ret = 0; | ||
132 | |||
133 | if (unlikely(!max_items)) | ||
134 | return 0; | ||
135 | |||
136 | radix_tree_for_each_slot(slot, &pids, &iter, first_index) { | ||
137 | results[ret] = iter.index; | ||
138 | if (++ret == PIDVEC_SIZE) | ||
139 | break; | ||
140 | } | ||
141 | return ret; | ||
142 | } | ||
143 | |||
144 | void f2fs_destroy_trace_ios(void) | ||
145 | { | ||
146 | pid_t pid[PIDVEC_SIZE]; | ||
147 | pid_t next_pid = 0; | ||
148 | unsigned int found; | ||
149 | |||
150 | spin_lock(&pids_lock); | ||
151 | while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) { | ||
152 | unsigned idx; | ||
153 | |||
154 | next_pid = pid[found - 1] + 1; | ||
155 | for (idx = 0; idx < found; idx++) | ||
156 | radix_tree_delete(&pids, pid[idx]); | ||
157 | } | ||
158 | spin_unlock(&pids_lock); | ||
159 | } | ||
diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h new file mode 100644 index 000000000000..1041dbeb52ae --- /dev/null +++ b/fs/f2fs/trace.h | |||
@@ -0,0 +1,46 @@ | |||
1 | /* | ||
2 | * f2fs IO tracer | ||
3 | * | ||
4 | * Copyright (c) 2014 Motorola Mobility | ||
5 | * Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | #ifndef __F2FS_TRACE_H__ | ||
12 | #define __F2FS_TRACE_H__ | ||
13 | |||
14 | #ifdef CONFIG_F2FS_IO_TRACE | ||
15 | #include <trace/events/f2fs.h> | ||
16 | |||
17 | enum file_type { | ||
18 | __NORMAL_FILE, | ||
19 | __DIR_FILE, | ||
20 | __NODE_FILE, | ||
21 | __META_FILE, | ||
22 | __ATOMIC_FILE, | ||
23 | __VOLATILE_FILE, | ||
24 | __MISC_FILE, | ||
25 | }; | ||
26 | |||
27 | struct last_io_info { | ||
28 | int major, minor; | ||
29 | pid_t pid; | ||
30 | enum file_type type; | ||
31 | struct f2fs_io_info fio; | ||
32 | block_t len; | ||
33 | }; | ||
34 | |||
35 | extern void f2fs_trace_pid(struct page *); | ||
36 | extern void f2fs_trace_ios(struct page *, struct f2fs_io_info *, int); | ||
37 | extern void f2fs_build_trace_ios(void); | ||
38 | extern void f2fs_destroy_trace_ios(void); | ||
39 | #else | ||
40 | #define f2fs_trace_pid(p) | ||
41 | #define f2fs_trace_ios(p, i, n) | ||
42 | #define f2fs_build_trace_ios() | ||
43 | #define f2fs_destroy_trace_ios() | ||
44 | |||
45 | #endif | ||
46 | #endif /* __F2FS_TRACE_H__ */ | ||
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 7b41a2dcdd76..497c7c5263c7 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -580,7 +580,7 @@ static void fat_set_state(struct super_block *sb, | |||
580 | { | 580 | { |
581 | struct buffer_head *bh; | 581 | struct buffer_head *bh; |
582 | struct fat_boot_sector *b; | 582 | struct fat_boot_sector *b; |
583 | struct msdos_sb_info *sbi = sb->s_fs_info; | 583 | struct msdos_sb_info *sbi = MSDOS_SB(sb); |
584 | 584 | ||
585 | /* do not change any thing if mounted read only */ | 585 | /* do not change any thing if mounted read only */ |
586 | if ((sb->s_flags & MS_RDONLY) && !force) | 586 | if ((sb->s_flags & MS_RDONLY) && !force) |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 2d609a5fbfea..073657f755d4 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -66,15 +66,21 @@ int writeback_in_progress(struct backing_dev_info *bdi) | |||
66 | } | 66 | } |
67 | EXPORT_SYMBOL(writeback_in_progress); | 67 | EXPORT_SYMBOL(writeback_in_progress); |
68 | 68 | ||
69 | static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) | 69 | struct backing_dev_info *inode_to_bdi(struct inode *inode) |
70 | { | 70 | { |
71 | struct super_block *sb = inode->i_sb; | 71 | struct super_block *sb; |
72 | 72 | ||
73 | if (sb_is_blkdev_sb(sb)) | 73 | if (!inode) |
74 | return inode->i_mapping->backing_dev_info; | 74 | return &noop_backing_dev_info; |
75 | 75 | ||
76 | sb = inode->i_sb; | ||
77 | #ifdef CONFIG_BLOCK | ||
78 | if (sb_is_blkdev_sb(sb)) | ||
79 | return blk_get_backing_dev_info(I_BDEV(inode)); | ||
80 | #endif | ||
76 | return sb->s_bdi; | 81 | return sb->s_bdi; |
77 | } | 82 | } |
83 | EXPORT_SYMBOL_GPL(inode_to_bdi); | ||
78 | 84 | ||
79 | static inline struct inode *wb_inode(struct list_head *head) | 85 | static inline struct inode *wb_inode(struct list_head *head) |
80 | { | 86 | { |
@@ -247,14 +253,19 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) | |||
247 | return ret; | 253 | return ret; |
248 | } | 254 | } |
249 | 255 | ||
256 | #define EXPIRE_DIRTY_ATIME 0x0001 | ||
257 | |||
250 | /* | 258 | /* |
251 | * Move expired (dirtied before work->older_than_this) dirty inodes from | 259 | * Move expired (dirtied before work->older_than_this) dirty inodes from |
252 | * @delaying_queue to @dispatch_queue. | 260 | * @delaying_queue to @dispatch_queue. |
253 | */ | 261 | */ |
254 | static int move_expired_inodes(struct list_head *delaying_queue, | 262 | static int move_expired_inodes(struct list_head *delaying_queue, |
255 | struct list_head *dispatch_queue, | 263 | struct list_head *dispatch_queue, |
264 | int flags, | ||
256 | struct wb_writeback_work *work) | 265 | struct wb_writeback_work *work) |
257 | { | 266 | { |
267 | unsigned long *older_than_this = NULL; | ||
268 | unsigned long expire_time; | ||
258 | LIST_HEAD(tmp); | 269 | LIST_HEAD(tmp); |
259 | struct list_head *pos, *node; | 270 | struct list_head *pos, *node; |
260 | struct super_block *sb = NULL; | 271 | struct super_block *sb = NULL; |
@@ -262,13 +273,21 @@ static int move_expired_inodes(struct list_head *delaying_queue, | |||
262 | int do_sb_sort = 0; | 273 | int do_sb_sort = 0; |
263 | int moved = 0; | 274 | int moved = 0; |
264 | 275 | ||
276 | if ((flags & EXPIRE_DIRTY_ATIME) == 0) | ||
277 | older_than_this = work->older_than_this; | ||
278 | else if ((work->reason == WB_REASON_SYNC) == 0) { | ||
279 | expire_time = jiffies - (HZ * 86400); | ||
280 | older_than_this = &expire_time; | ||
281 | } | ||
265 | while (!list_empty(delaying_queue)) { | 282 | while (!list_empty(delaying_queue)) { |
266 | inode = wb_inode(delaying_queue->prev); | 283 | inode = wb_inode(delaying_queue->prev); |
267 | if (work->older_than_this && | 284 | if (older_than_this && |
268 | inode_dirtied_after(inode, *work->older_than_this)) | 285 | inode_dirtied_after(inode, *older_than_this)) |
269 | break; | 286 | break; |
270 | list_move(&inode->i_wb_list, &tmp); | 287 | list_move(&inode->i_wb_list, &tmp); |
271 | moved++; | 288 | moved++; |
289 | if (flags & EXPIRE_DIRTY_ATIME) | ||
290 | set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state); | ||
272 | if (sb_is_blkdev_sb(inode->i_sb)) | 291 | if (sb_is_blkdev_sb(inode->i_sb)) |
273 | continue; | 292 | continue; |
274 | if (sb && sb != inode->i_sb) | 293 | if (sb && sb != inode->i_sb) |
@@ -309,9 +328,12 @@ out: | |||
309 | static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work) | 328 | static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work) |
310 | { | 329 | { |
311 | int moved; | 330 | int moved; |
331 | |||
312 | assert_spin_locked(&wb->list_lock); | 332 | assert_spin_locked(&wb->list_lock); |
313 | list_splice_init(&wb->b_more_io, &wb->b_io); | 333 | list_splice_init(&wb->b_more_io, &wb->b_io); |
314 | moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, work); | 334 | moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work); |
335 | moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io, | ||
336 | EXPIRE_DIRTY_ATIME, work); | ||
315 | trace_writeback_queue_io(wb, work, moved); | 337 | trace_writeback_queue_io(wb, work, moved); |
316 | } | 338 | } |
317 | 339 | ||
@@ -435,6 +457,8 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, | |||
435 | * updates after data IO completion. | 457 | * updates after data IO completion. |
436 | */ | 458 | */ |
437 | redirty_tail(inode, wb); | 459 | redirty_tail(inode, wb); |
460 | } else if (inode->i_state & I_DIRTY_TIME) { | ||
461 | list_move(&inode->i_wb_list, &wb->b_dirty_time); | ||
438 | } else { | 462 | } else { |
439 | /* The inode is clean. Remove from writeback lists. */ | 463 | /* The inode is clean. Remove from writeback lists. */ |
440 | list_del_init(&inode->i_wb_list); | 464 | list_del_init(&inode->i_wb_list); |
@@ -481,7 +505,13 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
481 | spin_lock(&inode->i_lock); | 505 | spin_lock(&inode->i_lock); |
482 | 506 | ||
483 | dirty = inode->i_state & I_DIRTY; | 507 | dirty = inode->i_state & I_DIRTY; |
484 | inode->i_state &= ~I_DIRTY; | 508 | if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) && |
509 | (inode->i_state & I_DIRTY_TIME)) || | ||
510 | (inode->i_state & I_DIRTY_TIME_EXPIRED)) { | ||
511 | dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; | ||
512 | trace_writeback_lazytime(inode); | ||
513 | } | ||
514 | inode->i_state &= ~dirty; | ||
485 | 515 | ||
486 | /* | 516 | /* |
487 | * Paired with smp_mb() in __mark_inode_dirty(). This allows | 517 | * Paired with smp_mb() in __mark_inode_dirty(). This allows |
@@ -501,8 +531,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
501 | 531 | ||
502 | spin_unlock(&inode->i_lock); | 532 | spin_unlock(&inode->i_lock); |
503 | 533 | ||
534 | if (dirty & I_DIRTY_TIME) | ||
535 | mark_inode_dirty_sync(inode); | ||
504 | /* Don't write the inode if only I_DIRTY_PAGES was set */ | 536 | /* Don't write the inode if only I_DIRTY_PAGES was set */ |
505 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { | 537 | if (dirty & ~I_DIRTY_PAGES) { |
506 | int err = write_inode(inode, wbc); | 538 | int err = write_inode(inode, wbc); |
507 | if (ret == 0) | 539 | if (ret == 0) |
508 | ret = err; | 540 | ret = err; |
@@ -550,7 +582,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, | |||
550 | * make sure inode is on some writeback list and leave it there unless | 582 | * make sure inode is on some writeback list and leave it there unless |
551 | * we have completely cleaned the inode. | 583 | * we have completely cleaned the inode. |
552 | */ | 584 | */ |
553 | if (!(inode->i_state & I_DIRTY) && | 585 | if (!(inode->i_state & I_DIRTY_ALL) && |
554 | (wbc->sync_mode != WB_SYNC_ALL || | 586 | (wbc->sync_mode != WB_SYNC_ALL || |
555 | !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))) | 587 | !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))) |
556 | goto out; | 588 | goto out; |
@@ -565,7 +597,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, | |||
565 | * If inode is clean, remove it from writeback lists. Otherwise don't | 597 | * If inode is clean, remove it from writeback lists. Otherwise don't |
566 | * touch it. See comment above for explanation. | 598 | * touch it. See comment above for explanation. |
567 | */ | 599 | */ |
568 | if (!(inode->i_state & I_DIRTY)) | 600 | if (!(inode->i_state & I_DIRTY_ALL)) |
569 | list_del_init(&inode->i_wb_list); | 601 | list_del_init(&inode->i_wb_list); |
570 | spin_unlock(&wb->list_lock); | 602 | spin_unlock(&wb->list_lock); |
571 | inode_sync_complete(inode); | 603 | inode_sync_complete(inode); |
@@ -707,7 +739,7 @@ static long writeback_sb_inodes(struct super_block *sb, | |||
707 | wrote += write_chunk - wbc.nr_to_write; | 739 | wrote += write_chunk - wbc.nr_to_write; |
708 | spin_lock(&wb->list_lock); | 740 | spin_lock(&wb->list_lock); |
709 | spin_lock(&inode->i_lock); | 741 | spin_lock(&inode->i_lock); |
710 | if (!(inode->i_state & I_DIRTY)) | 742 | if (!(inode->i_state & I_DIRTY_ALL)) |
711 | wrote++; | 743 | wrote++; |
712 | requeue_inode(inode, wb, &wbc); | 744 | requeue_inode(inode, wb, &wbc); |
713 | inode_sync_complete(inode); | 745 | inode_sync_complete(inode); |
@@ -1145,16 +1177,20 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode) | |||
1145 | * page->mapping->host, so the page-dirtying time is recorded in the internal | 1177 | * page->mapping->host, so the page-dirtying time is recorded in the internal |
1146 | * blockdev inode. | 1178 | * blockdev inode. |
1147 | */ | 1179 | */ |
1180 | #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) | ||
1148 | void __mark_inode_dirty(struct inode *inode, int flags) | 1181 | void __mark_inode_dirty(struct inode *inode, int flags) |
1149 | { | 1182 | { |
1150 | struct super_block *sb = inode->i_sb; | 1183 | struct super_block *sb = inode->i_sb; |
1151 | struct backing_dev_info *bdi = NULL; | 1184 | struct backing_dev_info *bdi = NULL; |
1185 | int dirtytime; | ||
1186 | |||
1187 | trace_writeback_mark_inode_dirty(inode, flags); | ||
1152 | 1188 | ||
1153 | /* | 1189 | /* |
1154 | * Don't do this for I_DIRTY_PAGES - that doesn't actually | 1190 | * Don't do this for I_DIRTY_PAGES - that doesn't actually |
1155 | * dirty the inode itself | 1191 | * dirty the inode itself |
1156 | */ | 1192 | */ |
1157 | if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { | 1193 | if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_TIME)) { |
1158 | trace_writeback_dirty_inode_start(inode, flags); | 1194 | trace_writeback_dirty_inode_start(inode, flags); |
1159 | 1195 | ||
1160 | if (sb->s_op->dirty_inode) | 1196 | if (sb->s_op->dirty_inode) |
@@ -1162,6 +1198,9 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1162 | 1198 | ||
1163 | trace_writeback_dirty_inode(inode, flags); | 1199 | trace_writeback_dirty_inode(inode, flags); |
1164 | } | 1200 | } |
1201 | if (flags & I_DIRTY_INODE) | ||
1202 | flags &= ~I_DIRTY_TIME; | ||
1203 | dirtytime = flags & I_DIRTY_TIME; | ||
1165 | 1204 | ||
1166 | /* | 1205 | /* |
1167 | * Paired with smp_mb() in __writeback_single_inode() for the | 1206 | * Paired with smp_mb() in __writeback_single_inode() for the |
@@ -1169,16 +1208,21 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1169 | */ | 1208 | */ |
1170 | smp_mb(); | 1209 | smp_mb(); |
1171 | 1210 | ||
1172 | if ((inode->i_state & flags) == flags) | 1211 | if (((inode->i_state & flags) == flags) || |
1212 | (dirtytime && (inode->i_state & I_DIRTY_INODE))) | ||
1173 | return; | 1213 | return; |
1174 | 1214 | ||
1175 | if (unlikely(block_dump)) | 1215 | if (unlikely(block_dump)) |
1176 | block_dump___mark_inode_dirty(inode); | 1216 | block_dump___mark_inode_dirty(inode); |
1177 | 1217 | ||
1178 | spin_lock(&inode->i_lock); | 1218 | spin_lock(&inode->i_lock); |
1219 | if (dirtytime && (inode->i_state & I_DIRTY_INODE)) | ||
1220 | goto out_unlock_inode; | ||
1179 | if ((inode->i_state & flags) != flags) { | 1221 | if ((inode->i_state & flags) != flags) { |
1180 | const int was_dirty = inode->i_state & I_DIRTY; | 1222 | const int was_dirty = inode->i_state & I_DIRTY; |
1181 | 1223 | ||
1224 | if (flags & I_DIRTY_INODE) | ||
1225 | inode->i_state &= ~I_DIRTY_TIME; | ||
1182 | inode->i_state |= flags; | 1226 | inode->i_state |= flags; |
1183 | 1227 | ||
1184 | /* | 1228 | /* |
@@ -1225,8 +1269,10 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1225 | } | 1269 | } |
1226 | 1270 | ||
1227 | inode->dirtied_when = jiffies; | 1271 | inode->dirtied_when = jiffies; |
1228 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); | 1272 | list_move(&inode->i_wb_list, dirtytime ? |
1273 | &bdi->wb.b_dirty_time : &bdi->wb.b_dirty); | ||
1229 | spin_unlock(&bdi->wb.list_lock); | 1274 | spin_unlock(&bdi->wb.list_lock); |
1275 | trace_writeback_dirty_inode_enqueue(inode); | ||
1230 | 1276 | ||
1231 | if (wakeup_bdi) | 1277 | if (wakeup_bdi) |
1232 | bdi_wakeup_thread_delayed(bdi); | 1278 | bdi_wakeup_thread_delayed(bdi); |
diff --git a/fs/fs_pin.c b/fs/fs_pin.c index 9368236ca100..b06c98796afb 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c | |||
@@ -1,78 +1,102 @@ | |||
1 | #include <linux/fs.h> | 1 | #include <linux/fs.h> |
2 | #include <linux/sched.h> | ||
2 | #include <linux/slab.h> | 3 | #include <linux/slab.h> |
3 | #include <linux/fs_pin.h> | ||
4 | #include "internal.h" | 4 | #include "internal.h" |
5 | #include "mount.h" | 5 | #include "mount.h" |
6 | 6 | ||
7 | static void pin_free_rcu(struct rcu_head *head) | ||
8 | { | ||
9 | kfree(container_of(head, struct fs_pin, rcu)); | ||
10 | } | ||
11 | |||
12 | static DEFINE_SPINLOCK(pin_lock); | 7 | static DEFINE_SPINLOCK(pin_lock); |
13 | 8 | ||
14 | void pin_put(struct fs_pin *p) | ||
15 | { | ||
16 | if (atomic_long_dec_and_test(&p->count)) | ||
17 | call_rcu(&p->rcu, pin_free_rcu); | ||
18 | } | ||
19 | |||
20 | void pin_remove(struct fs_pin *pin) | 9 | void pin_remove(struct fs_pin *pin) |
21 | { | 10 | { |
22 | spin_lock(&pin_lock); | 11 | spin_lock(&pin_lock); |
23 | hlist_del(&pin->m_list); | 12 | hlist_del(&pin->m_list); |
24 | hlist_del(&pin->s_list); | 13 | hlist_del(&pin->s_list); |
25 | spin_unlock(&pin_lock); | 14 | spin_unlock(&pin_lock); |
15 | spin_lock_irq(&pin->wait.lock); | ||
16 | pin->done = 1; | ||
17 | wake_up_locked(&pin->wait); | ||
18 | spin_unlock_irq(&pin->wait.lock); | ||
26 | } | 19 | } |
27 | 20 | ||
28 | void pin_insert(struct fs_pin *pin, struct vfsmount *m) | 21 | void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p) |
29 | { | 22 | { |
30 | spin_lock(&pin_lock); | 23 | spin_lock(&pin_lock); |
31 | hlist_add_head(&pin->s_list, &m->mnt_sb->s_pins); | 24 | if (p) |
25 | hlist_add_head(&pin->s_list, p); | ||
32 | hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins); | 26 | hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins); |
33 | spin_unlock(&pin_lock); | 27 | spin_unlock(&pin_lock); |
34 | } | 28 | } |
35 | 29 | ||
30 | void pin_insert(struct fs_pin *pin, struct vfsmount *m) | ||
31 | { | ||
32 | pin_insert_group(pin, m, &m->mnt_sb->s_pins); | ||
33 | } | ||
34 | |||
35 | void pin_kill(struct fs_pin *p) | ||
36 | { | ||
37 | wait_queue_t wait; | ||
38 | |||
39 | if (!p) { | ||
40 | rcu_read_unlock(); | ||
41 | return; | ||
42 | } | ||
43 | init_wait(&wait); | ||
44 | spin_lock_irq(&p->wait.lock); | ||
45 | if (likely(!p->done)) { | ||
46 | p->done = -1; | ||
47 | spin_unlock_irq(&p->wait.lock); | ||
48 | rcu_read_unlock(); | ||
49 | p->kill(p); | ||
50 | return; | ||
51 | } | ||
52 | if (p->done > 0) { | ||
53 | spin_unlock_irq(&p->wait.lock); | ||
54 | rcu_read_unlock(); | ||
55 | return; | ||
56 | } | ||
57 | __add_wait_queue(&p->wait, &wait); | ||
58 | while (1) { | ||
59 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
60 | spin_unlock_irq(&p->wait.lock); | ||
61 | rcu_read_unlock(); | ||
62 | schedule(); | ||
63 | rcu_read_lock(); | ||
64 | if (likely(list_empty(&wait.task_list))) | ||
65 | break; | ||
66 | /* OK, we know p couldn't have been freed yet */ | ||
67 | spin_lock_irq(&p->wait.lock); | ||
68 | if (p->done > 0) { | ||
69 | spin_unlock_irq(&p->wait.lock); | ||
70 | break; | ||
71 | } | ||
72 | } | ||
73 | rcu_read_unlock(); | ||
74 | } | ||
75 | |||
36 | void mnt_pin_kill(struct mount *m) | 76 | void mnt_pin_kill(struct mount *m) |
37 | { | 77 | { |
38 | while (1) { | 78 | while (1) { |
39 | struct hlist_node *p; | 79 | struct hlist_node *p; |
40 | struct fs_pin *pin; | ||
41 | rcu_read_lock(); | 80 | rcu_read_lock(); |
42 | p = ACCESS_ONCE(m->mnt_pins.first); | 81 | p = ACCESS_ONCE(m->mnt_pins.first); |
43 | if (!p) { | 82 | if (!p) { |
44 | rcu_read_unlock(); | 83 | rcu_read_unlock(); |
45 | break; | 84 | break; |
46 | } | 85 | } |
47 | pin = hlist_entry(p, struct fs_pin, m_list); | 86 | pin_kill(hlist_entry(p, struct fs_pin, m_list)); |
48 | if (!atomic_long_inc_not_zero(&pin->count)) { | ||
49 | rcu_read_unlock(); | ||
50 | cpu_relax(); | ||
51 | continue; | ||
52 | } | ||
53 | rcu_read_unlock(); | ||
54 | pin->kill(pin); | ||
55 | } | 87 | } |
56 | } | 88 | } |
57 | 89 | ||
58 | void sb_pin_kill(struct super_block *sb) | 90 | void group_pin_kill(struct hlist_head *p) |
59 | { | 91 | { |
60 | while (1) { | 92 | while (1) { |
61 | struct hlist_node *p; | 93 | struct hlist_node *q; |
62 | struct fs_pin *pin; | ||
63 | rcu_read_lock(); | 94 | rcu_read_lock(); |
64 | p = ACCESS_ONCE(sb->s_pins.first); | 95 | q = ACCESS_ONCE(p->first); |
65 | if (!p) { | 96 | if (!q) { |
66 | rcu_read_unlock(); | 97 | rcu_read_unlock(); |
67 | break; | 98 | break; |
68 | } | 99 | } |
69 | pin = hlist_entry(p, struct fs_pin, s_list); | 100 | pin_kill(hlist_entry(q, struct fs_pin, s_list)); |
70 | if (!atomic_long_inc_not_zero(&pin->count)) { | ||
71 | rcu_read_unlock(); | ||
72 | cpu_relax(); | ||
73 | continue; | ||
74 | } | ||
75 | rcu_read_unlock(); | ||
76 | pin->kill(pin); | ||
77 | } | 101 | } |
78 | } | 102 | } |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 760b2c552197..c01ec3bdcfd8 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -1159,7 +1159,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
1159 | mutex_lock(&inode->i_mutex); | 1159 | mutex_lock(&inode->i_mutex); |
1160 | 1160 | ||
1161 | /* We can write back this queue in page reclaim */ | 1161 | /* We can write back this queue in page reclaim */ |
1162 | current->backing_dev_info = mapping->backing_dev_info; | 1162 | current->backing_dev_info = inode_to_bdi(inode); |
1163 | 1163 | ||
1164 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 1164 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
1165 | if (err) | 1165 | if (err) |
@@ -1464,7 +1464,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) | |||
1464 | { | 1464 | { |
1465 | struct inode *inode = req->inode; | 1465 | struct inode *inode = req->inode; |
1466 | struct fuse_inode *fi = get_fuse_inode(inode); | 1466 | struct fuse_inode *fi = get_fuse_inode(inode); |
1467 | struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; | 1467 | struct backing_dev_info *bdi = inode_to_bdi(inode); |
1468 | int i; | 1468 | int i; |
1469 | 1469 | ||
1470 | list_del(&req->writepages_entry); | 1470 | list_del(&req->writepages_entry); |
@@ -1658,7 +1658,7 @@ static int fuse_writepage_locked(struct page *page) | |||
1658 | req->end = fuse_writepage_end; | 1658 | req->end = fuse_writepage_end; |
1659 | req->inode = inode; | 1659 | req->inode = inode; |
1660 | 1660 | ||
1661 | inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK); | 1661 | inc_bdi_stat(inode_to_bdi(inode), BDI_WRITEBACK); |
1662 | inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); | 1662 | inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); |
1663 | 1663 | ||
1664 | spin_lock(&fc->lock); | 1664 | spin_lock(&fc->lock); |
@@ -1768,7 +1768,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, | |||
1768 | 1768 | ||
1769 | if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT || | 1769 | if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT || |
1770 | old_req->state == FUSE_REQ_PENDING)) { | 1770 | old_req->state == FUSE_REQ_PENDING)) { |
1771 | struct backing_dev_info *bdi = page->mapping->backing_dev_info; | 1771 | struct backing_dev_info *bdi = inode_to_bdi(page->mapping->host); |
1772 | 1772 | ||
1773 | copy_highpage(old_req->pages[0], page); | 1773 | copy_highpage(old_req->pages[0], page); |
1774 | spin_unlock(&fc->lock); | 1774 | spin_unlock(&fc->lock); |
@@ -1872,7 +1872,7 @@ static int fuse_writepages_fill(struct page *page, | |||
1872 | req->page_descs[req->num_pages].offset = 0; | 1872 | req->page_descs[req->num_pages].offset = 0; |
1873 | req->page_descs[req->num_pages].length = PAGE_SIZE; | 1873 | req->page_descs[req->num_pages].length = PAGE_SIZE; |
1874 | 1874 | ||
1875 | inc_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK); | 1875 | inc_bdi_stat(inode_to_bdi(inode), BDI_WRITEBACK); |
1876 | inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); | 1876 | inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); |
1877 | 1877 | ||
1878 | err = 0; | 1878 | err = 0; |
@@ -2062,7 +2062,6 @@ static const struct vm_operations_struct fuse_file_vm_ops = { | |||
2062 | .fault = filemap_fault, | 2062 | .fault = filemap_fault, |
2063 | .map_pages = filemap_map_pages, | 2063 | .map_pages = filemap_map_pages, |
2064 | .page_mkwrite = fuse_page_mkwrite, | 2064 | .page_mkwrite = fuse_page_mkwrite, |
2065 | .remap_pages = generic_file_remap_pages, | ||
2066 | }; | 2065 | }; |
2067 | 2066 | ||
2068 | static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) | 2067 | static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f38256e4476e..e8799c11424b 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -308,7 +308,6 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, | |||
308 | if (!fc->writeback_cache || !S_ISREG(attr->mode)) | 308 | if (!fc->writeback_cache || !S_ISREG(attr->mode)) |
309 | inode->i_flags |= S_NOCMTIME; | 309 | inode->i_flags |= S_NOCMTIME; |
310 | inode->i_generation = generation; | 310 | inode->i_generation = generation; |
311 | inode->i_data.backing_dev_info = &fc->bdi; | ||
312 | fuse_init_inode(inode, attr); | 311 | fuse_init_inode(inode, attr); |
313 | unlock_new_inode(inode); | 312 | unlock_new_inode(inode); |
314 | } else if ((inode->i_mode ^ attr->mode) & S_IFMT) { | 313 | } else if ((inode->i_mode ^ attr->mode) & S_IFMT) { |
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 3088e2a38e30..7b3143064af1 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c | |||
@@ -73,7 +73,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) | |||
73 | 73 | ||
74 | BUG_ON(name == NULL); | 74 | BUG_ON(name == NULL); |
75 | 75 | ||
76 | if (acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode))) | 76 | if (acl && acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode))) |
77 | return -E2BIG; | 77 | return -E2BIG; |
78 | 78 | ||
79 | if (type == ACL_TYPE_ACCESS) { | 79 | if (type == ACL_TYPE_ACCESS) { |
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 805b37fed638..4ad4f94edebe 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -289,7 +289,7 @@ continue_unlock: | |||
289 | if (!clear_page_dirty_for_io(page)) | 289 | if (!clear_page_dirty_for_io(page)) |
290 | goto continue_unlock; | 290 | goto continue_unlock; |
291 | 291 | ||
292 | trace_wbc_writepage(wbc, mapping->backing_dev_info); | 292 | trace_wbc_writepage(wbc, inode_to_bdi(inode)); |
293 | 293 | ||
294 | ret = __gfs2_jdata_writepage(page, wbc); | 294 | ret = __gfs2_jdata_writepage(page, wbc); |
295 | if (unlikely(ret)) { | 295 | if (unlikely(ret)) { |
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index c5a34f09e228..6371192961e2 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
@@ -1896,7 +1896,8 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, | |||
1896 | 1896 | ||
1897 | ht = kzalloc(size, GFP_NOFS | __GFP_NOWARN); | 1897 | ht = kzalloc(size, GFP_NOFS | __GFP_NOWARN); |
1898 | if (ht == NULL) | 1898 | if (ht == NULL) |
1899 | ht = vzalloc(size); | 1899 | ht = __vmalloc(size, GFP_NOFS | __GFP_NOWARN | __GFP_ZERO, |
1900 | PAGE_KERNEL); | ||
1900 | if (!ht) | 1901 | if (!ht) |
1901 | return -ENOMEM; | 1902 | return -ENOMEM; |
1902 | 1903 | ||
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 6e600abf694a..3e32bb8e2d7e 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
@@ -498,7 +498,6 @@ static const struct vm_operations_struct gfs2_vm_ops = { | |||
498 | .fault = filemap_fault, | 498 | .fault = filemap_fault, |
499 | .map_pages = filemap_map_pages, | 499 | .map_pages = filemap_map_pages, |
500 | .page_mkwrite = gfs2_page_mkwrite, | 500 | .page_mkwrite = gfs2_page_mkwrite, |
501 | .remap_pages = generic_file_remap_pages, | ||
502 | }; | 501 | }; |
503 | 502 | ||
504 | /** | 503 | /** |
@@ -655,7 +654,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, | |||
655 | { | 654 | { |
656 | struct address_space *mapping = file->f_mapping; | 655 | struct address_space *mapping = file->f_mapping; |
657 | struct inode *inode = mapping->host; | 656 | struct inode *inode = mapping->host; |
658 | int sync_state = inode->i_state & I_DIRTY; | 657 | int sync_state = inode->i_state & I_DIRTY_ALL; |
659 | struct gfs2_inode *ip = GFS2_I(inode); | 658 | struct gfs2_inode *ip = GFS2_I(inode); |
660 | int ret = 0, ret1 = 0; | 659 | int ret = 0, ret1 = 0; |
661 | 660 | ||
@@ -668,7 +667,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, | |||
668 | if (!gfs2_is_jdata(ip)) | 667 | if (!gfs2_is_jdata(ip)) |
669 | sync_state &= ~I_DIRTY_PAGES; | 668 | sync_state &= ~I_DIRTY_PAGES; |
670 | if (datasync) | 669 | if (datasync) |
671 | sync_state &= ~I_DIRTY_SYNC; | 670 | sync_state &= ~(I_DIRTY_SYNC | I_DIRTY_TIME); |
672 | 671 | ||
673 | if (sync_state) { | 672 | if (sync_state) { |
674 | ret = sync_inode_metadata(inode, 1); | 673 | ret = sync_inode_metadata(inode, 1); |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a23524aa3eac..f42dffba056a 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -173,19 +173,14 @@ void gfs2_glock_add_to_lru(struct gfs2_glock *gl) | |||
173 | spin_unlock(&lru_lock); | 173 | spin_unlock(&lru_lock); |
174 | } | 174 | } |
175 | 175 | ||
176 | static void __gfs2_glock_remove_from_lru(struct gfs2_glock *gl) | 176 | static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) |
177 | { | 177 | { |
178 | spin_lock(&lru_lock); | ||
178 | if (!list_empty(&gl->gl_lru)) { | 179 | if (!list_empty(&gl->gl_lru)) { |
179 | list_del_init(&gl->gl_lru); | 180 | list_del_init(&gl->gl_lru); |
180 | atomic_dec(&lru_count); | 181 | atomic_dec(&lru_count); |
181 | clear_bit(GLF_LRU, &gl->gl_flags); | 182 | clear_bit(GLF_LRU, &gl->gl_flags); |
182 | } | 183 | } |
183 | } | ||
184 | |||
185 | static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) | ||
186 | { | ||
187 | spin_lock(&lru_lock); | ||
188 | __gfs2_glock_remove_from_lru(gl); | ||
189 | spin_unlock(&lru_lock); | 184 | spin_unlock(&lru_lock); |
190 | } | 185 | } |
191 | 186 | ||
@@ -205,9 +200,7 @@ void gfs2_glock_put(struct gfs2_glock *gl) | |||
205 | 200 | ||
206 | lockref_mark_dead(&gl->gl_lockref); | 201 | lockref_mark_dead(&gl->gl_lockref); |
207 | 202 | ||
208 | spin_lock(&lru_lock); | 203 | gfs2_glock_remove_from_lru(gl); |
209 | __gfs2_glock_remove_from_lru(gl); | ||
210 | spin_unlock(&lru_lock); | ||
211 | spin_unlock(&gl->gl_lockref.lock); | 204 | spin_unlock(&gl->gl_lockref.lock); |
212 | spin_lock_bucket(gl->gl_hash); | 205 | spin_lock_bucket(gl->gl_hash); |
213 | hlist_bl_del_rcu(&gl->gl_list); | 206 | hlist_bl_del_rcu(&gl->gl_list); |
@@ -775,7 +768,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, | |||
775 | mapping->flags = 0; | 768 | mapping->flags = 0; |
776 | mapping_set_gfp_mask(mapping, GFP_NOFS); | 769 | mapping_set_gfp_mask(mapping, GFP_NOFS); |
777 | mapping->private_data = NULL; | 770 | mapping->private_data = NULL; |
778 | mapping->backing_dev_info = s->s_bdi; | ||
779 | mapping->writeback_index = 0; | 771 | mapping->writeback_index = 0; |
780 | } | 772 | } |
781 | 773 | ||
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 9054002ebe70..73c72253faac 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -543,10 +543,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, | |||
543 | } | 543 | } |
544 | 544 | ||
545 | error = gfs2_dir_add(&dip->i_inode, name, ip, da); | 545 | error = gfs2_dir_add(&dip->i_inode, name, ip, da); |
546 | if (error) | ||
547 | goto fail_end_trans; | ||
548 | 546 | ||
549 | fail_end_trans: | ||
550 | gfs2_trans_end(sdp); | 547 | gfs2_trans_end(sdp); |
551 | fail_ipreserv: | 548 | fail_ipreserv: |
552 | gfs2_inplace_release(dip); | 549 | gfs2_inplace_release(dip); |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 8633ad328ee2..efc8e254787c 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -112,7 +112,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
112 | mapping->flags = 0; | 112 | mapping->flags = 0; |
113 | mapping_set_gfp_mask(mapping, GFP_NOFS); | 113 | mapping_set_gfp_mask(mapping, GFP_NOFS); |
114 | mapping->private_data = NULL; | 114 | mapping->private_data = NULL; |
115 | mapping->backing_dev_info = sb->s_bdi; | ||
116 | mapping->writeback_index = 0; | 115 | mapping->writeback_index = 0; |
117 | 116 | ||
118 | spin_lock_init(&sdp->sd_log_lock); | 117 | spin_lock_init(&sdp->sd_log_lock); |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 3e193cb36996..3aa17d4d1cfc 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -145,7 +145,8 @@ static void gfs2_qd_dispose(struct list_head *list) | |||
145 | } | 145 | } |
146 | 146 | ||
147 | 147 | ||
148 | static enum lru_status gfs2_qd_isolate(struct list_head *item, spinlock_t *lock, void *arg) | 148 | static enum lru_status gfs2_qd_isolate(struct list_head *item, |
149 | struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) | ||
149 | { | 150 | { |
150 | struct list_head *dispose = arg; | 151 | struct list_head *dispose = arg; |
151 | struct gfs2_quota_data *qd = list_entry(item, struct gfs2_quota_data, qd_lru); | 152 | struct gfs2_quota_data *qd = list_entry(item, struct gfs2_quota_data, qd_lru); |
@@ -155,7 +156,7 @@ static enum lru_status gfs2_qd_isolate(struct list_head *item, spinlock_t *lock, | |||
155 | 156 | ||
156 | if (qd->qd_lockref.count == 0) { | 157 | if (qd->qd_lockref.count == 0) { |
157 | lockref_mark_dead(&qd->qd_lockref); | 158 | lockref_mark_dead(&qd->qd_lockref); |
158 | list_move(&qd->qd_lru, dispose); | 159 | list_lru_isolate_move(lru, &qd->qd_lru, dispose); |
159 | } | 160 | } |
160 | 161 | ||
161 | spin_unlock(&qd->qd_lockref.lock); | 162 | spin_unlock(&qd->qd_lockref.lock); |
@@ -171,8 +172,8 @@ static unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, | |||
171 | if (!(sc->gfp_mask & __GFP_FS)) | 172 | if (!(sc->gfp_mask & __GFP_FS)) |
172 | return SHRINK_STOP; | 173 | return SHRINK_STOP; |
173 | 174 | ||
174 | freed = list_lru_walk_node(&gfs2_qd_lru, sc->nid, gfs2_qd_isolate, | 175 | freed = list_lru_shrink_walk(&gfs2_qd_lru, sc, |
175 | &dispose, &sc->nr_to_scan); | 176 | gfs2_qd_isolate, &dispose); |
176 | 177 | ||
177 | gfs2_qd_dispose(&dispose); | 178 | gfs2_qd_dispose(&dispose); |
178 | 179 | ||
@@ -182,7 +183,7 @@ static unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, | |||
182 | static unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, | 183 | static unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, |
183 | struct shrink_control *sc) | 184 | struct shrink_control *sc) |
184 | { | 185 | { |
185 | return vfs_pressure_ratio(list_lru_count_node(&gfs2_qd_lru, sc->nid)); | 186 | return vfs_pressure_ratio(list_lru_shrink_count(&gfs2_qd_lru, sc)); |
186 | } | 187 | } |
187 | 188 | ||
188 | struct shrinker gfs2_qd_shrinker = { | 189 | struct shrinker gfs2_qd_shrinker = { |
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 573bd3b758fa..1b645773c98e 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c | |||
@@ -439,7 +439,7 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, | |||
439 | 439 | ||
440 | ls->ls_recover_jid_done = jid; | 440 | ls->ls_recover_jid_done = jid; |
441 | ls->ls_recover_jid_status = message; | 441 | ls->ls_recover_jid_status = message; |
442 | sprintf(env_jid, "JID=%d", jid); | 442 | sprintf(env_jid, "JID=%u", jid); |
443 | sprintf(env_status, "RECOVERY=%s", | 443 | sprintf(env_status, "RECOVERY=%s", |
444 | message == LM_RD_SUCCESS ? "Done" : "Failed"); | 444 | message == LM_RD_SUCCESS ? "Done" : "Failed"); |
445 | kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); | 445 | kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); |
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 5b327f837de7..1666382b198d 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -743,7 +743,7 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
743 | struct gfs2_inode *ip = GFS2_I(inode); | 743 | struct gfs2_inode *ip = GFS2_I(inode); |
744 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 744 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
745 | struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl); | 745 | struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl); |
746 | struct backing_dev_info *bdi = metamapping->backing_dev_info; | 746 | struct backing_dev_info *bdi = inode_to_bdi(metamapping->host); |
747 | int ret = 0; | 747 | int ret = 0; |
748 | 748 | ||
749 | if (wbc->sync_mode == WB_SYNC_ALL) | 749 | if (wbc->sync_mode == WB_SYNC_ALL) |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 3ab566ba5696..ae8e8811f0e8 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
@@ -96,7 +96,7 @@ static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf) | |||
96 | struct super_block *sb = sdp->sd_vfs; | 96 | struct super_block *sb = sdp->sd_vfs; |
97 | int frozen = (sb->s_writers.frozen == SB_UNFROZEN) ? 0 : 1; | 97 | int frozen = (sb->s_writers.frozen == SB_UNFROZEN) ? 0 : 1; |
98 | 98 | ||
99 | return snprintf(buf, PAGE_SIZE, "%u\n", frozen); | 99 | return snprintf(buf, PAGE_SIZE, "%d\n", frozen); |
100 | } | 100 | } |
101 | 101 | ||
102 | static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len) | 102 | static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len) |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 5eba47f593f8..c274aca8e8dc 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -62,12 +62,6 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) | |||
62 | return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); | 62 | return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); |
63 | } | 63 | } |
64 | 64 | ||
65 | static struct backing_dev_info hugetlbfs_backing_dev_info = { | ||
66 | .name = "hugetlbfs", | ||
67 | .ra_pages = 0, /* No readahead */ | ||
68 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | ||
69 | }; | ||
70 | |||
71 | int sysctl_hugetlb_shm_group; | 65 | int sysctl_hugetlb_shm_group; |
72 | 66 | ||
73 | enum { | 67 | enum { |
@@ -498,7 +492,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, | |||
498 | lockdep_set_class(&inode->i_mapping->i_mmap_rwsem, | 492 | lockdep_set_class(&inode->i_mapping->i_mmap_rwsem, |
499 | &hugetlbfs_i_mmap_rwsem_key); | 493 | &hugetlbfs_i_mmap_rwsem_key); |
500 | inode->i_mapping->a_ops = &hugetlbfs_aops; | 494 | inode->i_mapping->a_ops = &hugetlbfs_aops; |
501 | inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; | ||
502 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 495 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
503 | inode->i_mapping->private_data = resv_map; | 496 | inode->i_mapping->private_data = resv_map; |
504 | info = HUGETLBFS_I(inode); | 497 | info = HUGETLBFS_I(inode); |
@@ -1032,10 +1025,6 @@ static int __init init_hugetlbfs_fs(void) | |||
1032 | return -ENOTSUPP; | 1025 | return -ENOTSUPP; |
1033 | } | 1026 | } |
1034 | 1027 | ||
1035 | error = bdi_init(&hugetlbfs_backing_dev_info); | ||
1036 | if (error) | ||
1037 | return error; | ||
1038 | |||
1039 | error = -ENOMEM; | 1028 | error = -ENOMEM; |
1040 | hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache", | 1029 | hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache", |
1041 | sizeof(struct hugetlbfs_inode_info), | 1030 | sizeof(struct hugetlbfs_inode_info), |
@@ -1071,7 +1060,6 @@ static int __init init_hugetlbfs_fs(void) | |||
1071 | out: | 1060 | out: |
1072 | kmem_cache_destroy(hugetlbfs_inode_cachep); | 1061 | kmem_cache_destroy(hugetlbfs_inode_cachep); |
1073 | out2: | 1062 | out2: |
1074 | bdi_destroy(&hugetlbfs_backing_dev_info); | ||
1075 | return error; | 1063 | return error; |
1076 | } | 1064 | } |
1077 | 1065 | ||
@@ -1091,7 +1079,6 @@ static void __exit exit_hugetlbfs_fs(void) | |||
1091 | for_each_hstate(h) | 1079 | for_each_hstate(h) |
1092 | kern_unmount(hugetlbfs_vfsmount[i++]); | 1080 | kern_unmount(hugetlbfs_vfsmount[i++]); |
1093 | unregister_filesystem(&hugetlbfs_fs_type); | 1081 | unregister_filesystem(&hugetlbfs_fs_type); |
1094 | bdi_destroy(&hugetlbfs_backing_dev_info); | ||
1095 | } | 1082 | } |
1096 | 1083 | ||
1097 | module_init(init_hugetlbfs_fs) | 1084 | module_init(init_hugetlbfs_fs) |
diff --git a/fs/inode.c b/fs/inode.c index aa149e7262ac..f00b16f45507 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/buffer_head.h> /* for inode_has_buffers */ | 18 | #include <linux/buffer_head.h> /* for inode_has_buffers */ |
19 | #include <linux/ratelimit.h> | 19 | #include <linux/ratelimit.h> |
20 | #include <linux/list_lru.h> | 20 | #include <linux/list_lru.h> |
21 | #include <trace/events/writeback.h> | ||
21 | #include "internal.h" | 22 | #include "internal.h" |
22 | 23 | ||
23 | /* | 24 | /* |
@@ -30,7 +31,7 @@ | |||
30 | * inode_sb_list_lock protects: | 31 | * inode_sb_list_lock protects: |
31 | * sb->s_inodes, inode->i_sb_list | 32 | * sb->s_inodes, inode->i_sb_list |
32 | * bdi->wb.list_lock protects: | 33 | * bdi->wb.list_lock protects: |
33 | * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list | 34 | * bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_wb_list |
34 | * inode_hash_lock protects: | 35 | * inode_hash_lock protects: |
35 | * inode_hashtable, inode->i_hash | 36 | * inode_hashtable, inode->i_hash |
36 | * | 37 | * |
@@ -170,20 +171,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) | |||
170 | atomic_set(&mapping->i_mmap_writable, 0); | 171 | atomic_set(&mapping->i_mmap_writable, 0); |
171 | mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); | 172 | mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); |
172 | mapping->private_data = NULL; | 173 | mapping->private_data = NULL; |
173 | mapping->backing_dev_info = &default_backing_dev_info; | ||
174 | mapping->writeback_index = 0; | 174 | mapping->writeback_index = 0; |
175 | |||
176 | /* | ||
177 | * If the block_device provides a backing_dev_info for client | ||
178 | * inodes then use that. Otherwise the inode share the bdev's | ||
179 | * backing_dev_info. | ||
180 | */ | ||
181 | if (sb->s_bdev) { | ||
182 | struct backing_dev_info *bdi; | ||
183 | |||
184 | bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; | ||
185 | mapping->backing_dev_info = bdi; | ||
186 | } | ||
187 | inode->i_private = NULL; | 175 | inode->i_private = NULL; |
188 | inode->i_mapping = mapping; | 176 | inode->i_mapping = mapping; |
189 | INIT_HLIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */ | 177 | INIT_HLIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */ |
@@ -194,7 +182,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) | |||
194 | #ifdef CONFIG_FSNOTIFY | 182 | #ifdef CONFIG_FSNOTIFY |
195 | inode->i_fsnotify_mask = 0; | 183 | inode->i_fsnotify_mask = 0; |
196 | #endif | 184 | #endif |
197 | 185 | inode->i_flctx = NULL; | |
198 | this_cpu_inc(nr_inodes); | 186 | this_cpu_inc(nr_inodes); |
199 | 187 | ||
200 | return 0; | 188 | return 0; |
@@ -237,6 +225,7 @@ void __destroy_inode(struct inode *inode) | |||
237 | BUG_ON(inode_has_buffers(inode)); | 225 | BUG_ON(inode_has_buffers(inode)); |
238 | security_inode_free(inode); | 226 | security_inode_free(inode); |
239 | fsnotify_inode_delete(inode); | 227 | fsnotify_inode_delete(inode); |
228 | locks_free_lock_context(inode->i_flctx); | ||
240 | if (!inode->i_nlink) { | 229 | if (!inode->i_nlink) { |
241 | WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0); | 230 | WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0); |
242 | atomic_long_dec(&inode->i_sb->s_remove_count); | 231 | atomic_long_dec(&inode->i_sb->s_remove_count); |
@@ -355,7 +344,6 @@ void address_space_init_once(struct address_space *mapping) | |||
355 | INIT_LIST_HEAD(&mapping->private_list); | 344 | INIT_LIST_HEAD(&mapping->private_list); |
356 | spin_lock_init(&mapping->private_lock); | 345 | spin_lock_init(&mapping->private_lock); |
357 | mapping->i_mmap = RB_ROOT; | 346 | mapping->i_mmap = RB_ROOT; |
358 | INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); | ||
359 | } | 347 | } |
360 | EXPORT_SYMBOL(address_space_init_once); | 348 | EXPORT_SYMBOL(address_space_init_once); |
361 | 349 | ||
@@ -416,7 +404,8 @@ static void inode_lru_list_add(struct inode *inode) | |||
416 | */ | 404 | */ |
417 | void inode_add_lru(struct inode *inode) | 405 | void inode_add_lru(struct inode *inode) |
418 | { | 406 | { |
419 | if (!(inode->i_state & (I_DIRTY | I_SYNC | I_FREEING | I_WILL_FREE)) && | 407 | if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC | |
408 | I_FREEING | I_WILL_FREE)) && | ||
420 | !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE) | 409 | !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE) |
421 | inode_lru_list_add(inode); | 410 | inode_lru_list_add(inode); |
422 | } | 411 | } |
@@ -647,7 +636,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) | |||
647 | spin_unlock(&inode->i_lock); | 636 | spin_unlock(&inode->i_lock); |
648 | continue; | 637 | continue; |
649 | } | 638 | } |
650 | if (inode->i_state & I_DIRTY && !kill_dirty) { | 639 | if (inode->i_state & I_DIRTY_ALL && !kill_dirty) { |
651 | spin_unlock(&inode->i_lock); | 640 | spin_unlock(&inode->i_lock); |
652 | busy = 1; | 641 | busy = 1; |
653 | continue; | 642 | continue; |
@@ -685,8 +674,8 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) | |||
685 | * LRU does not have strict ordering. Hence we don't want to reclaim inodes | 674 | * LRU does not have strict ordering. Hence we don't want to reclaim inodes |
686 | * with this flag set because they are the inodes that are out of order. | 675 | * with this flag set because they are the inodes that are out of order. |
687 | */ | 676 | */ |
688 | static enum lru_status | 677 | static enum lru_status inode_lru_isolate(struct list_head *item, |
689 | inode_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) | 678 | struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) |
690 | { | 679 | { |
691 | struct list_head *freeable = arg; | 680 | struct list_head *freeable = arg; |
692 | struct inode *inode = container_of(item, struct inode, i_lru); | 681 | struct inode *inode = container_of(item, struct inode, i_lru); |
@@ -704,7 +693,7 @@ inode_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) | |||
704 | */ | 693 | */ |
705 | if (atomic_read(&inode->i_count) || | 694 | if (atomic_read(&inode->i_count) || |
706 | (inode->i_state & ~I_REFERENCED)) { | 695 | (inode->i_state & ~I_REFERENCED)) { |
707 | list_del_init(&inode->i_lru); | 696 | list_lru_isolate(lru, &inode->i_lru); |
708 | spin_unlock(&inode->i_lock); | 697 | spin_unlock(&inode->i_lock); |
709 | this_cpu_dec(nr_unused); | 698 | this_cpu_dec(nr_unused); |
710 | return LRU_REMOVED; | 699 | return LRU_REMOVED; |
@@ -738,7 +727,7 @@ inode_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) | |||
738 | 727 | ||
739 | WARN_ON(inode->i_state & I_NEW); | 728 | WARN_ON(inode->i_state & I_NEW); |
740 | inode->i_state |= I_FREEING; | 729 | inode->i_state |= I_FREEING; |
741 | list_move(&inode->i_lru, freeable); | 730 | list_lru_isolate_move(lru, &inode->i_lru, freeable); |
742 | spin_unlock(&inode->i_lock); | 731 | spin_unlock(&inode->i_lock); |
743 | 732 | ||
744 | this_cpu_dec(nr_unused); | 733 | this_cpu_dec(nr_unused); |
@@ -751,14 +740,13 @@ inode_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) | |||
751 | * to trim from the LRU. Inodes to be freed are moved to a temporary list and | 740 | * to trim from the LRU. Inodes to be freed are moved to a temporary list and |
752 | * then are freed outside inode_lock by dispose_list(). | 741 | * then are freed outside inode_lock by dispose_list(). |
753 | */ | 742 | */ |
754 | long prune_icache_sb(struct super_block *sb, unsigned long nr_to_scan, | 743 | long prune_icache_sb(struct super_block *sb, struct shrink_control *sc) |
755 | int nid) | ||
756 | { | 744 | { |
757 | LIST_HEAD(freeable); | 745 | LIST_HEAD(freeable); |
758 | long freed; | 746 | long freed; |
759 | 747 | ||
760 | freed = list_lru_walk_node(&sb->s_inode_lru, nid, inode_lru_isolate, | 748 | freed = list_lru_shrink_walk(&sb->s_inode_lru, sc, |
761 | &freeable, &nr_to_scan); | 749 | inode_lru_isolate, &freeable); |
762 | dispose_list(&freeable); | 750 | dispose_list(&freeable); |
763 | return freed; | 751 | return freed; |
764 | } | 752 | } |
@@ -1282,6 +1270,56 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino) | |||
1282 | } | 1270 | } |
1283 | EXPORT_SYMBOL(ilookup); | 1271 | EXPORT_SYMBOL(ilookup); |
1284 | 1272 | ||
1273 | /** | ||
1274 | * find_inode_nowait - find an inode in the inode cache | ||
1275 | * @sb: super block of file system to search | ||
1276 | * @hashval: hash value (usually inode number) to search for | ||
1277 | * @match: callback used for comparisons between inodes | ||
1278 | * @data: opaque data pointer to pass to @match | ||
1279 | * | ||
1280 | * Search for the inode specified by @hashval and @data in the inode | ||
1281 | * cache, where the helper function @match will return 0 if the inode | ||
1282 | * does not match, 1 if the inode does match, and -1 if the search | ||
1283 | * should be stopped. The @match function must be responsible for | ||
1284 | * taking the i_lock spin_lock and checking i_state for an inode being | ||
1285 | * freed or being initialized, and incrementing the reference count | ||
1286 | * before returning 1. It also must not sleep, since it is called with | ||
1287 | * the inode_hash_lock spinlock held. | ||
1288 | * | ||
1289 | * This is a even more generalized version of ilookup5() when the | ||
1290 | * function must never block --- find_inode() can block in | ||
1291 | * __wait_on_freeing_inode() --- or when the caller can not increment | ||
1292 | * the reference count because the resulting iput() might cause an | ||
1293 | * inode eviction. The tradeoff is that the @match funtion must be | ||
1294 | * very carefully implemented. | ||
1295 | */ | ||
1296 | struct inode *find_inode_nowait(struct super_block *sb, | ||
1297 | unsigned long hashval, | ||
1298 | int (*match)(struct inode *, unsigned long, | ||
1299 | void *), | ||
1300 | void *data) | ||
1301 | { | ||
1302 | struct hlist_head *head = inode_hashtable + hash(sb, hashval); | ||
1303 | struct inode *inode, *ret_inode = NULL; | ||
1304 | int mval; | ||
1305 | |||
1306 | spin_lock(&inode_hash_lock); | ||
1307 | hlist_for_each_entry(inode, head, i_hash) { | ||
1308 | if (inode->i_sb != sb) | ||
1309 | continue; | ||
1310 | mval = match(inode, hashval, data); | ||
1311 | if (mval == 0) | ||
1312 | continue; | ||
1313 | if (mval == 1) | ||
1314 | ret_inode = inode; | ||
1315 | goto out; | ||
1316 | } | ||
1317 | out: | ||
1318 | spin_unlock(&inode_hash_lock); | ||
1319 | return ret_inode; | ||
1320 | } | ||
1321 | EXPORT_SYMBOL(find_inode_nowait); | ||
1322 | |||
1285 | int insert_inode_locked(struct inode *inode) | 1323 | int insert_inode_locked(struct inode *inode) |
1286 | { | 1324 | { |
1287 | struct super_block *sb = inode->i_sb; | 1325 | struct super_block *sb = inode->i_sb; |
@@ -1432,11 +1470,20 @@ static void iput_final(struct inode *inode) | |||
1432 | */ | 1470 | */ |
1433 | void iput(struct inode *inode) | 1471 | void iput(struct inode *inode) |
1434 | { | 1472 | { |
1435 | if (inode) { | 1473 | if (!inode) |
1436 | BUG_ON(inode->i_state & I_CLEAR); | 1474 | return; |
1437 | 1475 | BUG_ON(inode->i_state & I_CLEAR); | |
1438 | if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) | 1476 | retry: |
1439 | iput_final(inode); | 1477 | if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) { |
1478 | if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) { | ||
1479 | atomic_inc(&inode->i_count); | ||
1480 | inode->i_state &= ~I_DIRTY_TIME; | ||
1481 | spin_unlock(&inode->i_lock); | ||
1482 | trace_writeback_lazytime_iput(inode); | ||
1483 | mark_inode_dirty_sync(inode); | ||
1484 | goto retry; | ||
1485 | } | ||
1486 | iput_final(inode); | ||
1440 | } | 1487 | } |
1441 | } | 1488 | } |
1442 | EXPORT_SYMBOL(iput); | 1489 | EXPORT_SYMBOL(iput); |
@@ -1495,14 +1542,9 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, | |||
1495 | return 0; | 1542 | return 0; |
1496 | } | 1543 | } |
1497 | 1544 | ||
1498 | /* | 1545 | int generic_update_time(struct inode *inode, struct timespec *time, int flags) |
1499 | * This does the actual work of updating an inodes time or version. Must have | ||
1500 | * had called mnt_want_write() before calling this. | ||
1501 | */ | ||
1502 | static int update_time(struct inode *inode, struct timespec *time, int flags) | ||
1503 | { | 1546 | { |
1504 | if (inode->i_op->update_time) | 1547 | int iflags = I_DIRTY_TIME; |
1505 | return inode->i_op->update_time(inode, time, flags); | ||
1506 | 1548 | ||
1507 | if (flags & S_ATIME) | 1549 | if (flags & S_ATIME) |
1508 | inode->i_atime = *time; | 1550 | inode->i_atime = *time; |
@@ -1512,9 +1554,27 @@ static int update_time(struct inode *inode, struct timespec *time, int flags) | |||
1512 | inode->i_ctime = *time; | 1554 | inode->i_ctime = *time; |
1513 | if (flags & S_MTIME) | 1555 | if (flags & S_MTIME) |
1514 | inode->i_mtime = *time; | 1556 | inode->i_mtime = *time; |
1515 | mark_inode_dirty_sync(inode); | 1557 | |
1558 | if (!(inode->i_sb->s_flags & MS_LAZYTIME) || (flags & S_VERSION)) | ||
1559 | iflags |= I_DIRTY_SYNC; | ||
1560 | __mark_inode_dirty(inode, iflags); | ||
1516 | return 0; | 1561 | return 0; |
1517 | } | 1562 | } |
1563 | EXPORT_SYMBOL(generic_update_time); | ||
1564 | |||
1565 | /* | ||
1566 | * This does the actual work of updating an inodes time or version. Must have | ||
1567 | * had called mnt_want_write() before calling this. | ||
1568 | */ | ||
1569 | static int update_time(struct inode *inode, struct timespec *time, int flags) | ||
1570 | { | ||
1571 | int (*update_time)(struct inode *, struct timespec *, int); | ||
1572 | |||
1573 | update_time = inode->i_op->update_time ? inode->i_op->update_time : | ||
1574 | generic_update_time; | ||
1575 | |||
1576 | return update_time(inode, time, flags); | ||
1577 | } | ||
1518 | 1578 | ||
1519 | /** | 1579 | /** |
1520 | * touch_atime - update the access time | 1580 | * touch_atime - update the access time |
diff --git a/fs/internal.h b/fs/internal.h index e9a61fe67575..30459dab409d 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -14,6 +14,7 @@ struct file_system_type; | |||
14 | struct linux_binprm; | 14 | struct linux_binprm; |
15 | struct path; | 15 | struct path; |
16 | struct mount; | 16 | struct mount; |
17 | struct shrink_control; | ||
17 | 18 | ||
18 | /* | 19 | /* |
19 | * block_dev.c | 20 | * block_dev.c |
@@ -111,8 +112,7 @@ extern int open_check_o_direct(struct file *f); | |||
111 | * inode.c | 112 | * inode.c |
112 | */ | 113 | */ |
113 | extern spinlock_t inode_sb_list_lock; | 114 | extern spinlock_t inode_sb_list_lock; |
114 | extern long prune_icache_sb(struct super_block *sb, unsigned long nr_to_scan, | 115 | extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc); |
115 | int nid); | ||
116 | extern void inode_add_lru(struct inode *inode); | 116 | extern void inode_add_lru(struct inode *inode); |
117 | 117 | ||
118 | /* | 118 | /* |
@@ -129,8 +129,7 @@ extern int invalidate_inodes(struct super_block *, bool); | |||
129 | */ | 129 | */ |
130 | extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); | 130 | extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); |
131 | extern int d_set_mounted(struct dentry *dentry); | 131 | extern int d_set_mounted(struct dentry *dentry); |
132 | extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan, | 132 | extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc); |
133 | int nid); | ||
134 | 133 | ||
135 | /* | 134 | /* |
136 | * read_write.c | 135 | * read_write.c |
@@ -145,7 +144,7 @@ extern const struct file_operations pipefifo_fops; | |||
145 | /* | 144 | /* |
146 | * fs_pin.c | 145 | * fs_pin.c |
147 | */ | 146 | */ |
148 | extern void sb_pin_kill(struct super_block *sb); | 147 | extern void group_pin_kill(struct hlist_head *p); |
149 | extern void mnt_pin_kill(struct mount *m); | 148 | extern void mnt_pin_kill(struct mount *m); |
150 | 149 | ||
151 | /* | 150 | /* |
diff --git a/fs/ioctl.c b/fs/ioctl.c index 214c3c11fbc2..5d01d2638ca5 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
@@ -379,6 +379,11 @@ int __generic_block_fiemap(struct inode *inode, | |||
379 | past_eof = true; | 379 | past_eof = true; |
380 | } | 380 | } |
381 | cond_resched(); | 381 | cond_resched(); |
382 | if (fatal_signal_pending(current)) { | ||
383 | ret = -EINTR; | ||
384 | break; | ||
385 | } | ||
386 | |||
382 | } while (1); | 387 | } while (1); |
383 | 388 | ||
384 | /* If ret is 1 then we just hit the end of the extent array */ | 389 | /* If ret is 1 then we just hit the end of the extent array */ |
diff --git a/fs/isofs/util.c b/fs/isofs/util.c index 01e1ee7a998b..005a15cfd30a 100644 --- a/fs/isofs/util.c +++ b/fs/isofs/util.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * linux/fs/isofs/util.c | 2 | * linux/fs/isofs/util.c |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #include <linux/time.h> | ||
5 | #include "isofs.h" | 6 | #include "isofs.h" |
6 | 7 | ||
7 | /* | 8 | /* |
@@ -17,9 +18,9 @@ | |||
17 | int iso_date(char * p, int flag) | 18 | int iso_date(char * p, int flag) |
18 | { | 19 | { |
19 | int year, month, day, hour, minute, second, tz; | 20 | int year, month, day, hour, minute, second, tz; |
20 | int crtime, days, i; | 21 | int crtime; |
21 | 22 | ||
22 | year = p[0] - 70; | 23 | year = p[0]; |
23 | month = p[1]; | 24 | month = p[1]; |
24 | day = p[2]; | 25 | day = p[2]; |
25 | hour = p[3]; | 26 | hour = p[3]; |
@@ -31,18 +32,7 @@ int iso_date(char * p, int flag) | |||
31 | if (year < 0) { | 32 | if (year < 0) { |
32 | crtime = 0; | 33 | crtime = 0; |
33 | } else { | 34 | } else { |
34 | int monlen[12] = {31,28,31,30,31,30,31,31,30,31,30,31}; | 35 | crtime = mktime64(year+1900, month, day, hour, minute, second); |
35 | |||
36 | days = year * 365; | ||
37 | if (year > 2) | ||
38 | days += (year+1) / 4; | ||
39 | for (i = 1; i < month; i++) | ||
40 | days += monlen[i-1]; | ||
41 | if (((year+2) % 4) == 0 && month > 2) | ||
42 | days++; | ||
43 | days += day - 1; | ||
44 | crtime = ((((days * 24) + hour) * 60 + minute) * 60) | ||
45 | + second; | ||
46 | 36 | ||
47 | /* sign extend */ | 37 | /* sign extend */ |
48 | if (tz & 0x80) | 38 | if (tz & 0x80) |
diff --git a/fs/jffs2/compr_rubin.c b/fs/jffs2/compr_rubin.c index 92e0644bf867..556de100ebd5 100644 --- a/fs/jffs2/compr_rubin.c +++ b/fs/jffs2/compr_rubin.c | |||
@@ -84,11 +84,6 @@ static inline int pullbit(struct pushpull *pp) | |||
84 | return bit; | 84 | return bit; |
85 | } | 85 | } |
86 | 86 | ||
87 | static inline int pulledbits(struct pushpull *pp) | ||
88 | { | ||
89 | return pp->ofs; | ||
90 | } | ||
91 | |||
92 | 87 | ||
93 | static void init_rubin(struct rubin_state *rs, int div, int *bits) | 88 | static void init_rubin(struct rubin_state *rs, int div, int *bits) |
94 | { | 89 | { |
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 7654e87b0428..9ad5ba4b299b 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c | |||
@@ -510,6 +510,10 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo | |||
510 | sumlen = c->sector_size - je32_to_cpu(sm->offset); | 510 | sumlen = c->sector_size - je32_to_cpu(sm->offset); |
511 | sumptr = buf + buf_size - sumlen; | 511 | sumptr = buf + buf_size - sumlen; |
512 | 512 | ||
513 | /* sm->offset maybe wrong but MAGIC maybe right */ | ||
514 | if (sumlen > c->sector_size) | ||
515 | goto full_scan; | ||
516 | |||
513 | /* Now, make sure the summary itself is available */ | 517 | /* Now, make sure the summary itself is available */ |
514 | if (sumlen > buf_size) { | 518 | if (sumlen > buf_size) { |
515 | /* Need to kmalloc for this. */ | 519 | /* Need to kmalloc for this. */ |
@@ -544,6 +548,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo | |||
544 | } | 548 | } |
545 | } | 549 | } |
546 | 550 | ||
551 | full_scan: | ||
547 | buf_ofs = jeb->offset; | 552 | buf_ofs = jeb->offset; |
548 | 553 | ||
549 | if (!buf_size) { | 554 | if (!buf_size) { |
diff --git a/fs/jfs/endian24.h b/fs/jfs/endian24.h deleted file mode 100644 index fa92f7f1d0d0..000000000000 --- a/fs/jfs/endian24.h +++ /dev/null | |||
@@ -1,49 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) International Business Machines Corp., 2001 | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
12 | * the GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | #ifndef _H_ENDIAN24 | ||
19 | #define _H_ENDIAN24 | ||
20 | |||
21 | /* | ||
22 | * endian24.h: | ||
23 | * | ||
24 | * Endian conversion for 24-byte data | ||
25 | * | ||
26 | */ | ||
27 | #define __swab24(x) \ | ||
28 | ({ \ | ||
29 | __u32 __x = (x); \ | ||
30 | ((__u32)( \ | ||
31 | ((__x & (__u32)0x000000ffUL) << 16) | \ | ||
32 | (__x & (__u32)0x0000ff00UL) | \ | ||
33 | ((__x & (__u32)0x00ff0000UL) >> 16) )); \ | ||
34 | }) | ||
35 | |||
36 | #if (defined(__KERNEL__) && defined(__LITTLE_ENDIAN)) || (defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN)) | ||
37 | #define __cpu_to_le24(x) ((__u32)(x)) | ||
38 | #define __le24_to_cpu(x) ((__u32)(x)) | ||
39 | #else | ||
40 | #define __cpu_to_le24(x) __swab24(x) | ||
41 | #define __le24_to_cpu(x) __swab24(x) | ||
42 | #endif | ||
43 | |||
44 | #ifdef __KERNEL__ | ||
45 | #define cpu_to_le24 __cpu_to_le24 | ||
46 | #define le24_to_cpu __le24_to_cpu | ||
47 | #endif | ||
48 | |||
49 | #endif /* !_H_ENDIAN24 */ | ||
diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 33aa0cc1f8b8..10815f8dfd8b 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c | |||
@@ -39,7 +39,7 @@ int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
39 | return rc; | 39 | return rc; |
40 | 40 | ||
41 | mutex_lock(&inode->i_mutex); | 41 | mutex_lock(&inode->i_mutex); |
42 | if (!(inode->i_state & I_DIRTY) || | 42 | if (!(inode->i_state & I_DIRTY_ALL) || |
43 | (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) { | 43 | (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) { |
44 | /* Make sure committed changes hit the disk */ | 44 | /* Make sure committed changes hit the disk */ |
45 | jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1); | 45 | jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1); |
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 984c2bbf4f61..d88576e23fe4 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c | |||
@@ -1040,8 +1040,8 @@ static int dtSplitUp(tid_t tid, | |||
1040 | pxdlist.maxnpxd = 1; | 1040 | pxdlist.maxnpxd = 1; |
1041 | pxdlist.npxd = 0; | 1041 | pxdlist.npxd = 0; |
1042 | pxd = &pxdlist.pxd[0]; | 1042 | pxd = &pxdlist.pxd[0]; |
1043 | PXDaddress(pxd, nxaddr) | 1043 | PXDaddress(pxd, nxaddr); |
1044 | PXDlength(pxd, xlen + n); | 1044 | PXDlength(pxd, xlen + n); |
1045 | split->pxdlist = &pxdlist; | 1045 | split->pxdlist = &pxdlist; |
1046 | if ((rc = dtExtendPage(tid, ip, split, btstack))) { | 1046 | if ((rc = dtExtendPage(tid, ip, split, btstack))) { |
1047 | nxaddr = addressPXD(pxd); | 1047 | nxaddr = addressPXD(pxd); |
diff --git a/fs/jfs/jfs_types.h b/fs/jfs/jfs_types.h index 43ea3713c083..8f602dcb51fa 100644 --- a/fs/jfs/jfs_types.h +++ b/fs/jfs/jfs_types.h | |||
@@ -30,8 +30,6 @@ | |||
30 | #include <linux/types.h> | 30 | #include <linux/types.h> |
31 | #include <linux/nls.h> | 31 | #include <linux/nls.h> |
32 | 32 | ||
33 | #include "endian24.h" | ||
34 | |||
35 | /* | 33 | /* |
36 | * transaction and lock id's | 34 | * transaction and lock id's |
37 | * | 35 | * |
@@ -59,26 +57,42 @@ struct timestruc_t { | |||
59 | 57 | ||
60 | /* | 58 | /* |
61 | * physical xd (pxd) | 59 | * physical xd (pxd) |
60 | * | ||
61 | * The leftmost 24 bits of len_addr are the extent length. | ||
62 | * The rightmost 8 bits of len_addr are the most signficant bits of | ||
63 | * the extent address | ||
62 | */ | 64 | */ |
63 | typedef struct { | 65 | typedef struct { |
64 | unsigned len:24; | 66 | __le32 len_addr; |
65 | unsigned addr1:8; | ||
66 | __le32 addr2; | 67 | __le32 addr2; |
67 | } pxd_t; | 68 | } pxd_t; |
68 | 69 | ||
69 | /* xd_t field construction */ | 70 | /* xd_t field construction */ |
70 | 71 | ||
71 | #define PXDlength(pxd, length32) ((pxd)->len = __cpu_to_le24(length32)) | 72 | static inline void PXDlength(pxd_t *pxd, __u32 len) |
72 | #define PXDaddress(pxd, address64)\ | 73 | { |
73 | {\ | 74 | pxd->len_addr = (pxd->len_addr & cpu_to_le32(~0xffffff)) | |
74 | (pxd)->addr1 = ((s64)address64) >> 32;\ | 75 | cpu_to_le32(len & 0xffffff); |
75 | (pxd)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\ | 76 | } |
77 | |||
78 | static inline void PXDaddress(pxd_t *pxd, __u64 addr) | ||
79 | { | ||
80 | pxd->len_addr = (pxd->len_addr & cpu_to_le32(0xffffff)) | | ||
81 | cpu_to_le32((addr >> 32)<<24); | ||
82 | pxd->addr2 = cpu_to_le32(addr & 0xffffffff); | ||
76 | } | 83 | } |
77 | 84 | ||
78 | /* xd_t field extraction */ | 85 | /* xd_t field extraction */ |
79 | #define lengthPXD(pxd) __le24_to_cpu((pxd)->len) | 86 | static inline __u32 lengthPXD(pxd_t *pxd) |
80 | #define addressPXD(pxd)\ | 87 | { |
81 | ( ((s64)((pxd)->addr1)) << 32 | __le32_to_cpu((pxd)->addr2)) | 88 | return le32_to_cpu((pxd)->len_addr) & 0xffffff; |
89 | } | ||
90 | |||
91 | static inline __u64 addressPXD(pxd_t *pxd) | ||
92 | { | ||
93 | __u64 n = le32_to_cpu(pxd->len_addr) & ~0xffffff; | ||
94 | return (n << 8) + le32_to_cpu(pxd->addr2); | ||
95 | } | ||
82 | 96 | ||
83 | #define MAXTREEHEIGHT 8 | 97 | #define MAXTREEHEIGHT 8 |
84 | /* pxd list */ | 98 | /* pxd list */ |
@@ -93,12 +107,10 @@ struct pxdlist { | |||
93 | * data extent descriptor (dxd) | 107 | * data extent descriptor (dxd) |
94 | */ | 108 | */ |
95 | typedef struct { | 109 | typedef struct { |
96 | unsigned flag:8; /* 1: flags */ | 110 | __u8 flag; /* 1: flags */ |
97 | unsigned rsrvd:24; | 111 | __u8 rsrvd[3]; |
98 | __le32 size; /* 4: size in byte */ | 112 | __le32 size; /* 4: size in byte */ |
99 | unsigned len:24; /* 3: length in unit of fsblksize */ | 113 | pxd_t loc; /* 8: address and length in unit of fsblksize */ |
100 | unsigned addr1:8; /* 1: address in unit of fsblksize */ | ||
101 | __le32 addr2; /* 4: address in unit of fsblksize */ | ||
102 | } dxd_t; /* - 16 - */ | 114 | } dxd_t; /* - 16 - */ |
103 | 115 | ||
104 | /* dxd_t flags */ | 116 | /* dxd_t flags */ |
@@ -109,12 +121,11 @@ typedef struct { | |||
109 | #define DXD_CORRUPT 0x08 /* Inconsistency detected */ | 121 | #define DXD_CORRUPT 0x08 /* Inconsistency detected */ |
110 | 122 | ||
111 | /* dxd_t field construction | 123 | /* dxd_t field construction |
112 | * Conveniently, the PXD macros work for DXD | ||
113 | */ | 124 | */ |
114 | #define DXDlength PXDlength | 125 | #define DXDlength(dxd, len) PXDlength(&(dxd)->loc, len) |
115 | #define DXDaddress PXDaddress | 126 | #define DXDaddress(dxd, addr) PXDaddress(&(dxd)->loc, addr) |
116 | #define lengthDXD lengthPXD | 127 | #define lengthDXD(dxd) lengthPXD(&(dxd)->loc) |
117 | #define addressDXD addressPXD | 128 | #define addressDXD(dxd) addressPXD(&(dxd)->loc) |
118 | #define DXDsize(dxd, size32) ((dxd)->size = cpu_to_le32(size32)) | 129 | #define DXDsize(dxd, size32) ((dxd)->size = cpu_to_le32(size32)) |
119 | #define sizeDXD(dxd) le32_to_cpu((dxd)->size) | 130 | #define sizeDXD(dxd) le32_to_cpu((dxd)->size) |
120 | 131 | ||
diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h index 08c0c749b986..1e0987986d5f 100644 --- a/fs/jfs/jfs_xtree.h +++ b/fs/jfs/jfs_xtree.h | |||
@@ -29,13 +29,11 @@ | |||
29 | * extent allocation descriptor (xad) | 29 | * extent allocation descriptor (xad) |
30 | */ | 30 | */ |
31 | typedef struct xad { | 31 | typedef struct xad { |
32 | unsigned flag:8; /* 1: flag */ | 32 | __u8 flag; /* 1: flag */ |
33 | unsigned rsvrd:16; /* 2: reserved */ | 33 | __u8 rsvrd[2]; /* 2: reserved */ |
34 | unsigned off1:8; /* 1: offset in unit of fsblksize */ | 34 | __u8 off1; /* 1: offset in unit of fsblksize */ |
35 | __le32 off2; /* 4: offset in unit of fsblksize */ | 35 | __le32 off2; /* 4: offset in unit of fsblksize */ |
36 | unsigned len:24; /* 3: length in unit of fsblksize */ | 36 | pxd_t loc; /* 8: length and address in unit of fsblksize */ |
37 | unsigned addr1:8; /* 1: address in unit of fsblksize */ | ||
38 | __le32 addr2; /* 4: address in unit of fsblksize */ | ||
39 | } xad_t; /* (16) */ | 37 | } xad_t; /* (16) */ |
40 | 38 | ||
41 | #define MAXXLEN ((1 << 24) - 1) | 39 | #define MAXXLEN ((1 << 24) - 1) |
@@ -49,19 +47,14 @@ typedef struct xad { | |||
49 | (xad)->off1 = ((u64)offset64) >> 32;\ | 47 | (xad)->off1 = ((u64)offset64) >> 32;\ |
50 | (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\ | 48 | (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\ |
51 | } | 49 | } |
52 | #define XADaddress(xad, address64)\ | 50 | #define XADaddress(xad, address64) PXDaddress(&(xad)->loc, address64) |
53 | {\ | 51 | #define XADlength(xad, length32) PXDlength(&(xad)->loc, length32) |
54 | (xad)->addr1 = ((u64)address64) >> 32;\ | ||
55 | (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\ | ||
56 | } | ||
57 | #define XADlength(xad, length32) (xad)->len = __cpu_to_le24(length32) | ||
58 | 52 | ||
59 | /* xad_t field extraction */ | 53 | /* xad_t field extraction */ |
60 | #define offsetXAD(xad)\ | 54 | #define offsetXAD(xad)\ |
61 | ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2)) | 55 | ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2)) |
62 | #define addressXAD(xad)\ | 56 | #define addressXAD(xad) addressPXD(&(xad)->loc) |
63 | ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2)) | 57 | #define lengthXAD(xad) lengthPXD(&(xad)->loc) |
64 | #define lengthXAD(xad) __le24_to_cpu((xad)->len) | ||
65 | 58 | ||
66 | /* xad list */ | 59 | /* xad list */ |
67 | struct xadlist { | 60 | struct xadlist { |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 16c3a9556634..5d30c56ae075 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -619,8 +619,7 @@ out_mount_failed: | |||
619 | iput(sbi->direct_inode); | 619 | iput(sbi->direct_inode); |
620 | sbi->direct_inode = NULL; | 620 | sbi->direct_inode = NULL; |
621 | out_unload: | 621 | out_unload: |
622 | if (sbi->nls_tab) | 622 | unload_nls(sbi->nls_tab); |
623 | unload_nls(sbi->nls_tab); | ||
624 | out_kfree: | 623 | out_kfree: |
625 | kfree(sbi); | 624 | kfree(sbi); |
626 | return ret; | 625 | return ret; |
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 2d881b381d2b..6acc9648f986 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c | |||
@@ -411,8 +411,9 @@ void kernfs_put(struct kernfs_node *kn) | |||
411 | 411 | ||
412 | if (kernfs_type(kn) == KERNFS_LINK) | 412 | if (kernfs_type(kn) == KERNFS_LINK) |
413 | kernfs_put(kn->symlink.target_kn); | 413 | kernfs_put(kn->symlink.target_kn); |
414 | if (!(kn->flags & KERNFS_STATIC_NAME)) | 414 | |
415 | kfree(kn->name); | 415 | kfree_const(kn->name); |
416 | |||
416 | if (kn->iattr) { | 417 | if (kn->iattr) { |
417 | if (kn->iattr->ia_secdata) | 418 | if (kn->iattr->ia_secdata) |
418 | security_release_secctx(kn->iattr->ia_secdata, | 419 | security_release_secctx(kn->iattr->ia_secdata, |
@@ -506,15 +507,12 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, | |||
506 | const char *name, umode_t mode, | 507 | const char *name, umode_t mode, |
507 | unsigned flags) | 508 | unsigned flags) |
508 | { | 509 | { |
509 | char *dup_name = NULL; | ||
510 | struct kernfs_node *kn; | 510 | struct kernfs_node *kn; |
511 | int ret; | 511 | int ret; |
512 | 512 | ||
513 | if (!(flags & KERNFS_STATIC_NAME)) { | 513 | name = kstrdup_const(name, GFP_KERNEL); |
514 | name = dup_name = kstrdup(name, GFP_KERNEL); | 514 | if (!name) |
515 | if (!name) | 515 | return NULL; |
516 | return NULL; | ||
517 | } | ||
518 | 516 | ||
519 | kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL); | 517 | kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL); |
520 | if (!kn) | 518 | if (!kn) |
@@ -538,7 +536,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, | |||
538 | err_out2: | 536 | err_out2: |
539 | kmem_cache_free(kernfs_node_cache, kn); | 537 | kmem_cache_free(kernfs_node_cache, kn); |
540 | err_out1: | 538 | err_out1: |
541 | kfree(dup_name); | 539 | kfree_const(name); |
542 | return NULL; | 540 | return NULL; |
543 | } | 541 | } |
544 | 542 | ||
@@ -1264,7 +1262,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, | |||
1264 | /* rename kernfs_node */ | 1262 | /* rename kernfs_node */ |
1265 | if (strcmp(kn->name, new_name) != 0) { | 1263 | if (strcmp(kn->name, new_name) != 0) { |
1266 | error = -ENOMEM; | 1264 | error = -ENOMEM; |
1267 | new_name = kstrdup(new_name, GFP_KERNEL); | 1265 | new_name = kstrdup_const(new_name, GFP_KERNEL); |
1268 | if (!new_name) | 1266 | if (!new_name) |
1269 | goto out; | 1267 | goto out; |
1270 | } else { | 1268 | } else { |
@@ -1285,9 +1283,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, | |||
1285 | 1283 | ||
1286 | kn->ns = new_ns; | 1284 | kn->ns = new_ns; |
1287 | if (new_name) { | 1285 | if (new_name) { |
1288 | if (!(kn->flags & KERNFS_STATIC_NAME)) | 1286 | old_name = kn->name; |
1289 | old_name = kn->name; | ||
1290 | kn->flags &= ~KERNFS_STATIC_NAME; | ||
1291 | kn->name = new_name; | 1287 | kn->name = new_name; |
1292 | } | 1288 | } |
1293 | 1289 | ||
@@ -1297,7 +1293,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, | |||
1297 | kernfs_link_sibling(kn); | 1293 | kernfs_link_sibling(kn); |
1298 | 1294 | ||
1299 | kernfs_put(old_parent); | 1295 | kernfs_put(old_parent); |
1300 | kfree(old_name); | 1296 | kfree_const(old_name); |
1301 | 1297 | ||
1302 | error = 0; | 1298 | error = 0; |
1303 | out: | 1299 | out: |
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index ddc9f9612f16..b684e8a132e6 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c | |||
@@ -901,7 +901,6 @@ const struct file_operations kernfs_file_fops = { | |||
901 | * @ops: kernfs operations for the file | 901 | * @ops: kernfs operations for the file |
902 | * @priv: private data for the file | 902 | * @priv: private data for the file |
903 | * @ns: optional namespace tag of the file | 903 | * @ns: optional namespace tag of the file |
904 | * @name_is_static: don't copy file name | ||
905 | * @key: lockdep key for the file's active_ref, %NULL to disable lockdep | 904 | * @key: lockdep key for the file's active_ref, %NULL to disable lockdep |
906 | * | 905 | * |
907 | * Returns the created node on success, ERR_PTR() value on error. | 906 | * Returns the created node on success, ERR_PTR() value on error. |
@@ -911,7 +910,6 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, | |||
911 | umode_t mode, loff_t size, | 910 | umode_t mode, loff_t size, |
912 | const struct kernfs_ops *ops, | 911 | const struct kernfs_ops *ops, |
913 | void *priv, const void *ns, | 912 | void *priv, const void *ns, |
914 | bool name_is_static, | ||
915 | struct lock_class_key *key) | 913 | struct lock_class_key *key) |
916 | { | 914 | { |
917 | struct kernfs_node *kn; | 915 | struct kernfs_node *kn; |
@@ -919,8 +917,6 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, | |||
919 | int rc; | 917 | int rc; |
920 | 918 | ||
921 | flags = KERNFS_FILE; | 919 | flags = KERNFS_FILE; |
922 | if (name_is_static) | ||
923 | flags |= KERNFS_STATIC_NAME; | ||
924 | 920 | ||
925 | kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, flags); | 921 | kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, flags); |
926 | if (!kn) | 922 | if (!kn) |
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index 985217626e66..9000874a945b 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c | |||
@@ -24,12 +24,6 @@ static const struct address_space_operations kernfs_aops = { | |||
24 | .write_end = simple_write_end, | 24 | .write_end = simple_write_end, |
25 | }; | 25 | }; |
26 | 26 | ||
27 | static struct backing_dev_info kernfs_bdi = { | ||
28 | .name = "kernfs", | ||
29 | .ra_pages = 0, /* No readahead */ | ||
30 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | ||
31 | }; | ||
32 | |||
33 | static const struct inode_operations kernfs_iops = { | 27 | static const struct inode_operations kernfs_iops = { |
34 | .permission = kernfs_iop_permission, | 28 | .permission = kernfs_iop_permission, |
35 | .setattr = kernfs_iop_setattr, | 29 | .setattr = kernfs_iop_setattr, |
@@ -40,12 +34,6 @@ static const struct inode_operations kernfs_iops = { | |||
40 | .listxattr = kernfs_iop_listxattr, | 34 | .listxattr = kernfs_iop_listxattr, |
41 | }; | 35 | }; |
42 | 36 | ||
43 | void __init kernfs_inode_init(void) | ||
44 | { | ||
45 | if (bdi_init(&kernfs_bdi)) | ||
46 | panic("failed to init kernfs_bdi"); | ||
47 | } | ||
48 | |||
49 | static struct kernfs_iattrs *kernfs_iattrs(struct kernfs_node *kn) | 37 | static struct kernfs_iattrs *kernfs_iattrs(struct kernfs_node *kn) |
50 | { | 38 | { |
51 | static DEFINE_MUTEX(iattr_mutex); | 39 | static DEFINE_MUTEX(iattr_mutex); |
@@ -298,7 +286,6 @@ static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode) | |||
298 | kernfs_get(kn); | 286 | kernfs_get(kn); |
299 | inode->i_private = kn; | 287 | inode->i_private = kn; |
300 | inode->i_mapping->a_ops = &kernfs_aops; | 288 | inode->i_mapping->a_ops = &kernfs_aops; |
301 | inode->i_mapping->backing_dev_info = &kernfs_bdi; | ||
302 | inode->i_op = &kernfs_iops; | 289 | inode->i_op = &kernfs_iops; |
303 | 290 | ||
304 | set_default_inode_attr(inode, kn->mode); | 291 | set_default_inode_attr(inode, kn->mode); |
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index dc84a3ef9ca2..af9fa7499919 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h | |||
@@ -88,7 +88,6 @@ int kernfs_iop_removexattr(struct dentry *dentry, const char *name); | |||
88 | ssize_t kernfs_iop_getxattr(struct dentry *dentry, const char *name, void *buf, | 88 | ssize_t kernfs_iop_getxattr(struct dentry *dentry, const char *name, void *buf, |
89 | size_t size); | 89 | size_t size); |
90 | ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size); | 90 | ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size); |
91 | void kernfs_inode_init(void); | ||
92 | 91 | ||
93 | /* | 92 | /* |
94 | * dir.c | 93 | * dir.c |
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index f973ae9b05f1..8eaf417187f1 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c | |||
@@ -246,5 +246,4 @@ void __init kernfs_init(void) | |||
246 | kernfs_node_cache = kmem_cache_create("kernfs_node_cache", | 246 | kernfs_node_cache = kmem_cache_create("kernfs_node_cache", |
247 | sizeof(struct kernfs_node), | 247 | sizeof(struct kernfs_node), |
248 | 0, SLAB_PANIC, NULL); | 248 | 0, SLAB_PANIC, NULL); |
249 | kernfs_inode_init(); | ||
250 | } | 249 | } |
diff --git a/fs/libfs.c b/fs/libfs.c index 005843ce5dbd..b2ffdb045be4 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
@@ -948,7 +948,7 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end, | |||
948 | 948 | ||
949 | mutex_lock(&inode->i_mutex); | 949 | mutex_lock(&inode->i_mutex); |
950 | ret = sync_mapping_buffers(inode->i_mapping); | 950 | ret = sync_mapping_buffers(inode->i_mapping); |
951 | if (!(inode->i_state & I_DIRTY)) | 951 | if (!(inode->i_state & I_DIRTY_ALL)) |
952 | goto out; | 952 | goto out; |
953 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | 953 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) |
954 | goto out; | 954 | goto out; |
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 1cc6ec51e6b1..47a32b6d9b90 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c | |||
@@ -65,7 +65,7 @@ static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm) | |||
65 | return (struct sockaddr *)&nsm->sm_addr; | 65 | return (struct sockaddr *)&nsm->sm_addr; |
66 | } | 66 | } |
67 | 67 | ||
68 | static struct rpc_clnt *nsm_create(struct net *net) | 68 | static struct rpc_clnt *nsm_create(struct net *net, const char *nodename) |
69 | { | 69 | { |
70 | struct sockaddr_in sin = { | 70 | struct sockaddr_in sin = { |
71 | .sin_family = AF_INET, | 71 | .sin_family = AF_INET, |
@@ -77,6 +77,7 @@ static struct rpc_clnt *nsm_create(struct net *net) | |||
77 | .address = (struct sockaddr *)&sin, | 77 | .address = (struct sockaddr *)&sin, |
78 | .addrsize = sizeof(sin), | 78 | .addrsize = sizeof(sin), |
79 | .servername = "rpc.statd", | 79 | .servername = "rpc.statd", |
80 | .nodename = nodename, | ||
80 | .program = &nsm_program, | 81 | .program = &nsm_program, |
81 | .version = NSM_VERSION, | 82 | .version = NSM_VERSION, |
82 | .authflavor = RPC_AUTH_NULL, | 83 | .authflavor = RPC_AUTH_NULL, |
@@ -102,7 +103,7 @@ out: | |||
102 | return clnt; | 103 | return clnt; |
103 | } | 104 | } |
104 | 105 | ||
105 | static struct rpc_clnt *nsm_client_get(struct net *net) | 106 | static struct rpc_clnt *nsm_client_get(struct net *net, const char *nodename) |
106 | { | 107 | { |
107 | struct rpc_clnt *clnt, *new; | 108 | struct rpc_clnt *clnt, *new; |
108 | struct lockd_net *ln = net_generic(net, lockd_net_id); | 109 | struct lockd_net *ln = net_generic(net, lockd_net_id); |
@@ -111,7 +112,7 @@ static struct rpc_clnt *nsm_client_get(struct net *net) | |||
111 | if (clnt != NULL) | 112 | if (clnt != NULL) |
112 | goto out; | 113 | goto out; |
113 | 114 | ||
114 | clnt = new = nsm_create(net); | 115 | clnt = new = nsm_create(net, nodename); |
115 | if (IS_ERR(clnt)) | 116 | if (IS_ERR(clnt)) |
116 | goto out; | 117 | goto out; |
117 | 118 | ||
@@ -190,19 +191,23 @@ int nsm_monitor(const struct nlm_host *host) | |||
190 | struct nsm_res res; | 191 | struct nsm_res res; |
191 | int status; | 192 | int status; |
192 | struct rpc_clnt *clnt; | 193 | struct rpc_clnt *clnt; |
194 | const char *nodename = NULL; | ||
193 | 195 | ||
194 | dprintk("lockd: nsm_monitor(%s)\n", nsm->sm_name); | 196 | dprintk("lockd: nsm_monitor(%s)\n", nsm->sm_name); |
195 | 197 | ||
196 | if (nsm->sm_monitored) | 198 | if (nsm->sm_monitored) |
197 | return 0; | 199 | return 0; |
198 | 200 | ||
201 | if (host->h_rpcclnt) | ||
202 | nodename = host->h_rpcclnt->cl_nodename; | ||
203 | |||
199 | /* | 204 | /* |
200 | * Choose whether to record the caller_name or IP address of | 205 | * Choose whether to record the caller_name or IP address of |
201 | * this peer in the local rpc.statd's database. | 206 | * this peer in the local rpc.statd's database. |
202 | */ | 207 | */ |
203 | nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf; | 208 | nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf; |
204 | 209 | ||
205 | clnt = nsm_client_get(host->net); | 210 | clnt = nsm_client_get(host->net, nodename); |
206 | if (IS_ERR(clnt)) { | 211 | if (IS_ERR(clnt)) { |
207 | status = PTR_ERR(clnt); | 212 | status = PTR_ERR(clnt); |
208 | dprintk("lockd: failed to create NSM upcall transport, " | 213 | dprintk("lockd: failed to create NSM upcall transport, " |
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 56598742dde4..5581e020644b 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c | |||
@@ -57,8 +57,8 @@ static DEFINE_SPINLOCK(nlm_blocked_lock); | |||
57 | static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) | 57 | static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) |
58 | { | 58 | { |
59 | /* | 59 | /* |
60 | * We can get away with a static buffer because we're only | 60 | * We can get away with a static buffer because this is only called |
61 | * called with BKL held. | 61 | * from lockd, which is single-threaded. |
62 | */ | 62 | */ |
63 | static char buf[2*NLM_MAXCOOKIELEN+1]; | 63 | static char buf[2*NLM_MAXCOOKIELEN+1]; |
64 | unsigned int i, len = sizeof(buf); | 64 | unsigned int i, len = sizeof(buf); |
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index d12ff4e2dbe7..665ef5a05183 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c | |||
@@ -164,12 +164,15 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, | |||
164 | { | 164 | { |
165 | struct inode *inode = nlmsvc_file_inode(file); | 165 | struct inode *inode = nlmsvc_file_inode(file); |
166 | struct file_lock *fl; | 166 | struct file_lock *fl; |
167 | struct file_lock_context *flctx = inode->i_flctx; | ||
167 | struct nlm_host *lockhost; | 168 | struct nlm_host *lockhost; |
168 | 169 | ||
170 | if (!flctx || list_empty_careful(&flctx->flc_posix)) | ||
171 | return 0; | ||
169 | again: | 172 | again: |
170 | file->f_locks = 0; | 173 | file->f_locks = 0; |
171 | spin_lock(&inode->i_lock); | 174 | spin_lock(&flctx->flc_lock); |
172 | for (fl = inode->i_flock; fl; fl = fl->fl_next) { | 175 | list_for_each_entry(fl, &flctx->flc_posix, fl_list) { |
173 | if (fl->fl_lmops != &nlmsvc_lock_operations) | 176 | if (fl->fl_lmops != &nlmsvc_lock_operations) |
174 | continue; | 177 | continue; |
175 | 178 | ||
@@ -180,7 +183,7 @@ again: | |||
180 | if (match(lockhost, host)) { | 183 | if (match(lockhost, host)) { |
181 | struct file_lock lock = *fl; | 184 | struct file_lock lock = *fl; |
182 | 185 | ||
183 | spin_unlock(&inode->i_lock); | 186 | spin_unlock(&flctx->flc_lock); |
184 | lock.fl_type = F_UNLCK; | 187 | lock.fl_type = F_UNLCK; |
185 | lock.fl_start = 0; | 188 | lock.fl_start = 0; |
186 | lock.fl_end = OFFSET_MAX; | 189 | lock.fl_end = OFFSET_MAX; |
@@ -192,7 +195,7 @@ again: | |||
192 | goto again; | 195 | goto again; |
193 | } | 196 | } |
194 | } | 197 | } |
195 | spin_unlock(&inode->i_lock); | 198 | spin_unlock(&flctx->flc_lock); |
196 | 199 | ||
197 | return 0; | 200 | return 0; |
198 | } | 201 | } |
@@ -223,18 +226,21 @@ nlm_file_inuse(struct nlm_file *file) | |||
223 | { | 226 | { |
224 | struct inode *inode = nlmsvc_file_inode(file); | 227 | struct inode *inode = nlmsvc_file_inode(file); |
225 | struct file_lock *fl; | 228 | struct file_lock *fl; |
229 | struct file_lock_context *flctx = inode->i_flctx; | ||
226 | 230 | ||
227 | if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) | 231 | if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) |
228 | return 1; | 232 | return 1; |
229 | 233 | ||
230 | spin_lock(&inode->i_lock); | 234 | if (flctx && !list_empty_careful(&flctx->flc_posix)) { |
231 | for (fl = inode->i_flock; fl; fl = fl->fl_next) { | 235 | spin_lock(&flctx->flc_lock); |
232 | if (fl->fl_lmops == &nlmsvc_lock_operations) { | 236 | list_for_each_entry(fl, &flctx->flc_posix, fl_list) { |
233 | spin_unlock(&inode->i_lock); | 237 | if (fl->fl_lmops == &nlmsvc_lock_operations) { |
234 | return 1; | 238 | spin_unlock(&flctx->flc_lock); |
239 | return 1; | ||
240 | } | ||
235 | } | 241 | } |
242 | spin_unlock(&flctx->flc_lock); | ||
236 | } | 243 | } |
237 | spin_unlock(&inode->i_lock); | ||
238 | file->f_locks = 0; | 244 | file->f_locks = 0; |
239 | return 0; | 245 | return 0; |
240 | } | 246 | } |
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 9340e7e10ef6..5b651daad518 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c | |||
@@ -95,14 +95,6 @@ nlm_decode_fh(__be32 *p, struct nfs_fh *f) | |||
95 | return p + XDR_QUADLEN(NFS2_FHSIZE); | 95 | return p + XDR_QUADLEN(NFS2_FHSIZE); |
96 | } | 96 | } |
97 | 97 | ||
98 | static inline __be32 * | ||
99 | nlm_encode_fh(__be32 *p, struct nfs_fh *f) | ||
100 | { | ||
101 | *p++ = htonl(NFS2_FHSIZE); | ||
102 | memcpy(p, f->data, NFS2_FHSIZE); | ||
103 | return p + XDR_QUADLEN(NFS2_FHSIZE); | ||
104 | } | ||
105 | |||
106 | /* | 98 | /* |
107 | * Encode and decode owner handle | 99 | * Encode and decode owner handle |
108 | */ | 100 | */ |
diff --git a/fs/locks.c b/fs/locks.c index 59e2f905e4ff..365c82e1b3a9 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -137,7 +137,7 @@ | |||
137 | 137 | ||
138 | #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) | 138 | #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) |
139 | #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) | 139 | #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) |
140 | #define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG)) | 140 | #define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT)) |
141 | #define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK) | 141 | #define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK) |
142 | 142 | ||
143 | static bool lease_breaking(struct file_lock *fl) | 143 | static bool lease_breaking(struct file_lock *fl) |
@@ -157,14 +157,11 @@ static int target_leasetype(struct file_lock *fl) | |||
157 | int leases_enable = 1; | 157 | int leases_enable = 1; |
158 | int lease_break_time = 45; | 158 | int lease_break_time = 45; |
159 | 159 | ||
160 | #define for_each_lock(inode, lockp) \ | ||
161 | for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next) | ||
162 | |||
163 | /* | 160 | /* |
164 | * The global file_lock_list is only used for displaying /proc/locks, so we | 161 | * The global file_lock_list is only used for displaying /proc/locks, so we |
165 | * keep a list on each CPU, with each list protected by its own spinlock via | 162 | * keep a list on each CPU, with each list protected by its own spinlock via |
166 | * the file_lock_lglock. Note that alterations to the list also require that | 163 | * the file_lock_lglock. Note that alterations to the list also require that |
167 | * the relevant i_lock is held. | 164 | * the relevant flc_lock is held. |
168 | */ | 165 | */ |
169 | DEFINE_STATIC_LGLOCK(file_lock_lglock); | 166 | DEFINE_STATIC_LGLOCK(file_lock_lglock); |
170 | static DEFINE_PER_CPU(struct hlist_head, file_lock_list); | 167 | static DEFINE_PER_CPU(struct hlist_head, file_lock_list); |
@@ -192,21 +189,68 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); | |||
192 | * contrast to those that are acting as records of acquired locks). | 189 | * contrast to those that are acting as records of acquired locks). |
193 | * | 190 | * |
194 | * Note that when we acquire this lock in order to change the above fields, | 191 | * Note that when we acquire this lock in order to change the above fields, |
195 | * we often hold the i_lock as well. In certain cases, when reading the fields | 192 | * we often hold the flc_lock as well. In certain cases, when reading the fields |
196 | * protected by this lock, we can skip acquiring it iff we already hold the | 193 | * protected by this lock, we can skip acquiring it iff we already hold the |
197 | * i_lock. | 194 | * flc_lock. |
198 | * | 195 | * |
199 | * In particular, adding an entry to the fl_block list requires that you hold | 196 | * In particular, adding an entry to the fl_block list requires that you hold |
200 | * both the i_lock and the blocked_lock_lock (acquired in that order). Deleting | 197 | * both the flc_lock and the blocked_lock_lock (acquired in that order). |
201 | * an entry from the list however only requires the file_lock_lock. | 198 | * Deleting an entry from the list however only requires the file_lock_lock. |
202 | */ | 199 | */ |
203 | static DEFINE_SPINLOCK(blocked_lock_lock); | 200 | static DEFINE_SPINLOCK(blocked_lock_lock); |
204 | 201 | ||
202 | static struct kmem_cache *flctx_cache __read_mostly; | ||
205 | static struct kmem_cache *filelock_cache __read_mostly; | 203 | static struct kmem_cache *filelock_cache __read_mostly; |
206 | 204 | ||
205 | static struct file_lock_context * | ||
206 | locks_get_lock_context(struct inode *inode) | ||
207 | { | ||
208 | struct file_lock_context *new; | ||
209 | |||
210 | if (likely(inode->i_flctx)) | ||
211 | goto out; | ||
212 | |||
213 | new = kmem_cache_alloc(flctx_cache, GFP_KERNEL); | ||
214 | if (!new) | ||
215 | goto out; | ||
216 | |||
217 | spin_lock_init(&new->flc_lock); | ||
218 | INIT_LIST_HEAD(&new->flc_flock); | ||
219 | INIT_LIST_HEAD(&new->flc_posix); | ||
220 | INIT_LIST_HEAD(&new->flc_lease); | ||
221 | |||
222 | /* | ||
223 | * Assign the pointer if it's not already assigned. If it is, then | ||
224 | * free the context we just allocated. | ||
225 | */ | ||
226 | spin_lock(&inode->i_lock); | ||
227 | if (likely(!inode->i_flctx)) { | ||
228 | inode->i_flctx = new; | ||
229 | new = NULL; | ||
230 | } | ||
231 | spin_unlock(&inode->i_lock); | ||
232 | |||
233 | if (new) | ||
234 | kmem_cache_free(flctx_cache, new); | ||
235 | out: | ||
236 | return inode->i_flctx; | ||
237 | } | ||
238 | |||
239 | void | ||
240 | locks_free_lock_context(struct file_lock_context *ctx) | ||
241 | { | ||
242 | if (ctx) { | ||
243 | WARN_ON_ONCE(!list_empty(&ctx->flc_flock)); | ||
244 | WARN_ON_ONCE(!list_empty(&ctx->flc_posix)); | ||
245 | WARN_ON_ONCE(!list_empty(&ctx->flc_lease)); | ||
246 | kmem_cache_free(flctx_cache, ctx); | ||
247 | } | ||
248 | } | ||
249 | |||
207 | static void locks_init_lock_heads(struct file_lock *fl) | 250 | static void locks_init_lock_heads(struct file_lock *fl) |
208 | { | 251 | { |
209 | INIT_HLIST_NODE(&fl->fl_link); | 252 | INIT_HLIST_NODE(&fl->fl_link); |
253 | INIT_LIST_HEAD(&fl->fl_list); | ||
210 | INIT_LIST_HEAD(&fl->fl_block); | 254 | INIT_LIST_HEAD(&fl->fl_block); |
211 | init_waitqueue_head(&fl->fl_wait); | 255 | init_waitqueue_head(&fl->fl_wait); |
212 | } | 256 | } |
@@ -243,6 +287,7 @@ EXPORT_SYMBOL_GPL(locks_release_private); | |||
243 | void locks_free_lock(struct file_lock *fl) | 287 | void locks_free_lock(struct file_lock *fl) |
244 | { | 288 | { |
245 | BUG_ON(waitqueue_active(&fl->fl_wait)); | 289 | BUG_ON(waitqueue_active(&fl->fl_wait)); |
290 | BUG_ON(!list_empty(&fl->fl_list)); | ||
246 | BUG_ON(!list_empty(&fl->fl_block)); | 291 | BUG_ON(!list_empty(&fl->fl_block)); |
247 | BUG_ON(!hlist_unhashed(&fl->fl_link)); | 292 | BUG_ON(!hlist_unhashed(&fl->fl_link)); |
248 | 293 | ||
@@ -257,8 +302,8 @@ locks_dispose_list(struct list_head *dispose) | |||
257 | struct file_lock *fl; | 302 | struct file_lock *fl; |
258 | 303 | ||
259 | while (!list_empty(dispose)) { | 304 | while (!list_empty(dispose)) { |
260 | fl = list_first_entry(dispose, struct file_lock, fl_block); | 305 | fl = list_first_entry(dispose, struct file_lock, fl_list); |
261 | list_del_init(&fl->fl_block); | 306 | list_del_init(&fl->fl_list); |
262 | locks_free_lock(fl); | 307 | locks_free_lock(fl); |
263 | } | 308 | } |
264 | } | 309 | } |
@@ -513,7 +558,7 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) | |||
513 | return fl1->fl_owner == fl2->fl_owner; | 558 | return fl1->fl_owner == fl2->fl_owner; |
514 | } | 559 | } |
515 | 560 | ||
516 | /* Must be called with the i_lock held! */ | 561 | /* Must be called with the flc_lock held! */ |
517 | static void locks_insert_global_locks(struct file_lock *fl) | 562 | static void locks_insert_global_locks(struct file_lock *fl) |
518 | { | 563 | { |
519 | lg_local_lock(&file_lock_lglock); | 564 | lg_local_lock(&file_lock_lglock); |
@@ -522,12 +567,12 @@ static void locks_insert_global_locks(struct file_lock *fl) | |||
522 | lg_local_unlock(&file_lock_lglock); | 567 | lg_local_unlock(&file_lock_lglock); |
523 | } | 568 | } |
524 | 569 | ||
525 | /* Must be called with the i_lock held! */ | 570 | /* Must be called with the flc_lock held! */ |
526 | static void locks_delete_global_locks(struct file_lock *fl) | 571 | static void locks_delete_global_locks(struct file_lock *fl) |
527 | { | 572 | { |
528 | /* | 573 | /* |
529 | * Avoid taking lock if already unhashed. This is safe since this check | 574 | * Avoid taking lock if already unhashed. This is safe since this check |
530 | * is done while holding the i_lock, and new insertions into the list | 575 | * is done while holding the flc_lock, and new insertions into the list |
531 | * also require that it be held. | 576 | * also require that it be held. |
532 | */ | 577 | */ |
533 | if (hlist_unhashed(&fl->fl_link)) | 578 | if (hlist_unhashed(&fl->fl_link)) |
@@ -579,10 +624,10 @@ static void locks_delete_block(struct file_lock *waiter) | |||
579 | * the order they blocked. The documentation doesn't require this but | 624 | * the order they blocked. The documentation doesn't require this but |
580 | * it seems like the reasonable thing to do. | 625 | * it seems like the reasonable thing to do. |
581 | * | 626 | * |
582 | * Must be called with both the i_lock and blocked_lock_lock held. The fl_block | 627 | * Must be called with both the flc_lock and blocked_lock_lock held. The |
583 | * list itself is protected by the blocked_lock_lock, but by ensuring that the | 628 | * fl_block list itself is protected by the blocked_lock_lock, but by ensuring |
584 | * i_lock is also held on insertions we can avoid taking the blocked_lock_lock | 629 | * that the flc_lock is also held on insertions we can avoid taking the |
585 | * in some cases when we see that the fl_block list is empty. | 630 | * blocked_lock_lock in some cases when we see that the fl_block list is empty. |
586 | */ | 631 | */ |
587 | static void __locks_insert_block(struct file_lock *blocker, | 632 | static void __locks_insert_block(struct file_lock *blocker, |
588 | struct file_lock *waiter) | 633 | struct file_lock *waiter) |
@@ -594,7 +639,7 @@ static void __locks_insert_block(struct file_lock *blocker, | |||
594 | locks_insert_global_blocked(waiter); | 639 | locks_insert_global_blocked(waiter); |
595 | } | 640 | } |
596 | 641 | ||
597 | /* Must be called with i_lock held. */ | 642 | /* Must be called with flc_lock held. */ |
598 | static void locks_insert_block(struct file_lock *blocker, | 643 | static void locks_insert_block(struct file_lock *blocker, |
599 | struct file_lock *waiter) | 644 | struct file_lock *waiter) |
600 | { | 645 | { |
@@ -606,15 +651,15 @@ static void locks_insert_block(struct file_lock *blocker, | |||
606 | /* | 651 | /* |
607 | * Wake up processes blocked waiting for blocker. | 652 | * Wake up processes blocked waiting for blocker. |
608 | * | 653 | * |
609 | * Must be called with the inode->i_lock held! | 654 | * Must be called with the inode->flc_lock held! |
610 | */ | 655 | */ |
611 | static void locks_wake_up_blocks(struct file_lock *blocker) | 656 | static void locks_wake_up_blocks(struct file_lock *blocker) |
612 | { | 657 | { |
613 | /* | 658 | /* |
614 | * Avoid taking global lock if list is empty. This is safe since new | 659 | * Avoid taking global lock if list is empty. This is safe since new |
615 | * blocked requests are only added to the list under the i_lock, and | 660 | * blocked requests are only added to the list under the flc_lock, and |
616 | * the i_lock is always held here. Note that removal from the fl_block | 661 | * the flc_lock is always held here. Note that removal from the fl_block |
617 | * list does not require the i_lock, so we must recheck list_empty() | 662 | * list does not require the flc_lock, so we must recheck list_empty() |
618 | * after acquiring the blocked_lock_lock. | 663 | * after acquiring the blocked_lock_lock. |
619 | */ | 664 | */ |
620 | if (list_empty(&blocker->fl_block)) | 665 | if (list_empty(&blocker->fl_block)) |
@@ -635,63 +680,32 @@ static void locks_wake_up_blocks(struct file_lock *blocker) | |||
635 | spin_unlock(&blocked_lock_lock); | 680 | spin_unlock(&blocked_lock_lock); |
636 | } | 681 | } |
637 | 682 | ||
638 | /* Insert file lock fl into an inode's lock list at the position indicated | 683 | static void |
639 | * by pos. At the same time add the lock to the global file lock list. | 684 | locks_insert_lock_ctx(struct file_lock *fl, struct list_head *before) |
640 | * | ||
641 | * Must be called with the i_lock held! | ||
642 | */ | ||
643 | static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) | ||
644 | { | 685 | { |
645 | fl->fl_nspid = get_pid(task_tgid(current)); | 686 | fl->fl_nspid = get_pid(task_tgid(current)); |
646 | 687 | list_add_tail(&fl->fl_list, before); | |
647 | /* insert into file's list */ | ||
648 | fl->fl_next = *pos; | ||
649 | *pos = fl; | ||
650 | |||
651 | locks_insert_global_locks(fl); | 688 | locks_insert_global_locks(fl); |
652 | } | 689 | } |
653 | 690 | ||
654 | /** | 691 | static void |
655 | * locks_delete_lock - Delete a lock and then free it. | 692 | locks_unlink_lock_ctx(struct file_lock *fl) |
656 | * @thisfl_p: pointer that points to the fl_next field of the previous | ||
657 | * inode->i_flock list entry | ||
658 | * | ||
659 | * Unlink a lock from all lists and free the namespace reference, but don't | ||
660 | * free it yet. Wake up processes that are blocked waiting for this lock and | ||
661 | * notify the FS that the lock has been cleared. | ||
662 | * | ||
663 | * Must be called with the i_lock held! | ||
664 | */ | ||
665 | static void locks_unlink_lock(struct file_lock **thisfl_p) | ||
666 | { | 693 | { |
667 | struct file_lock *fl = *thisfl_p; | ||
668 | |||
669 | locks_delete_global_locks(fl); | 694 | locks_delete_global_locks(fl); |
670 | 695 | list_del_init(&fl->fl_list); | |
671 | *thisfl_p = fl->fl_next; | ||
672 | fl->fl_next = NULL; | ||
673 | |||
674 | if (fl->fl_nspid) { | 696 | if (fl->fl_nspid) { |
675 | put_pid(fl->fl_nspid); | 697 | put_pid(fl->fl_nspid); |
676 | fl->fl_nspid = NULL; | 698 | fl->fl_nspid = NULL; |
677 | } | 699 | } |
678 | |||
679 | locks_wake_up_blocks(fl); | 700 | locks_wake_up_blocks(fl); |
680 | } | 701 | } |
681 | 702 | ||
682 | /* | 703 | static void |
683 | * Unlink a lock from all lists and free it. | 704 | locks_delete_lock_ctx(struct file_lock *fl, struct list_head *dispose) |
684 | * | ||
685 | * Must be called with i_lock held! | ||
686 | */ | ||
687 | static void locks_delete_lock(struct file_lock **thisfl_p, | ||
688 | struct list_head *dispose) | ||
689 | { | 705 | { |
690 | struct file_lock *fl = *thisfl_p; | 706 | locks_unlink_lock_ctx(fl); |
691 | |||
692 | locks_unlink_lock(thisfl_p); | ||
693 | if (dispose) | 707 | if (dispose) |
694 | list_add(&fl->fl_block, dispose); | 708 | list_add(&fl->fl_list, dispose); |
695 | else | 709 | else |
696 | locks_free_lock(fl); | 710 | locks_free_lock(fl); |
697 | } | 711 | } |
@@ -746,22 +760,27 @@ void | |||
746 | posix_test_lock(struct file *filp, struct file_lock *fl) | 760 | posix_test_lock(struct file *filp, struct file_lock *fl) |
747 | { | 761 | { |
748 | struct file_lock *cfl; | 762 | struct file_lock *cfl; |
763 | struct file_lock_context *ctx; | ||
749 | struct inode *inode = file_inode(filp); | 764 | struct inode *inode = file_inode(filp); |
750 | 765 | ||
751 | spin_lock(&inode->i_lock); | 766 | ctx = inode->i_flctx; |
752 | for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) { | 767 | if (!ctx || list_empty_careful(&ctx->flc_posix)) { |
753 | if (!IS_POSIX(cfl)) | ||
754 | continue; | ||
755 | if (posix_locks_conflict(fl, cfl)) | ||
756 | break; | ||
757 | } | ||
758 | if (cfl) { | ||
759 | locks_copy_conflock(fl, cfl); | ||
760 | if (cfl->fl_nspid) | ||
761 | fl->fl_pid = pid_vnr(cfl->fl_nspid); | ||
762 | } else | ||
763 | fl->fl_type = F_UNLCK; | 768 | fl->fl_type = F_UNLCK; |
764 | spin_unlock(&inode->i_lock); | 769 | return; |
770 | } | ||
771 | |||
772 | spin_lock(&ctx->flc_lock); | ||
773 | list_for_each_entry(cfl, &ctx->flc_posix, fl_list) { | ||
774 | if (posix_locks_conflict(fl, cfl)) { | ||
775 | locks_copy_conflock(fl, cfl); | ||
776 | if (cfl->fl_nspid) | ||
777 | fl->fl_pid = pid_vnr(cfl->fl_nspid); | ||
778 | goto out; | ||
779 | } | ||
780 | } | ||
781 | fl->fl_type = F_UNLCK; | ||
782 | out: | ||
783 | spin_unlock(&ctx->flc_lock); | ||
765 | return; | 784 | return; |
766 | } | 785 | } |
767 | EXPORT_SYMBOL(posix_test_lock); | 786 | EXPORT_SYMBOL(posix_test_lock); |
@@ -845,34 +864,34 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, | |||
845 | static int flock_lock_file(struct file *filp, struct file_lock *request) | 864 | static int flock_lock_file(struct file *filp, struct file_lock *request) |
846 | { | 865 | { |
847 | struct file_lock *new_fl = NULL; | 866 | struct file_lock *new_fl = NULL; |
848 | struct file_lock **before; | 867 | struct file_lock *fl; |
849 | struct inode * inode = file_inode(filp); | 868 | struct file_lock_context *ctx; |
869 | struct inode *inode = file_inode(filp); | ||
850 | int error = 0; | 870 | int error = 0; |
851 | int found = 0; | 871 | bool found = false; |
852 | LIST_HEAD(dispose); | 872 | LIST_HEAD(dispose); |
853 | 873 | ||
874 | ctx = locks_get_lock_context(inode); | ||
875 | if (!ctx) | ||
876 | return -ENOMEM; | ||
877 | |||
854 | if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { | 878 | if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { |
855 | new_fl = locks_alloc_lock(); | 879 | new_fl = locks_alloc_lock(); |
856 | if (!new_fl) | 880 | if (!new_fl) |
857 | return -ENOMEM; | 881 | return -ENOMEM; |
858 | } | 882 | } |
859 | 883 | ||
860 | spin_lock(&inode->i_lock); | 884 | spin_lock(&ctx->flc_lock); |
861 | if (request->fl_flags & FL_ACCESS) | 885 | if (request->fl_flags & FL_ACCESS) |
862 | goto find_conflict; | 886 | goto find_conflict; |
863 | 887 | ||
864 | for_each_lock(inode, before) { | 888 | list_for_each_entry(fl, &ctx->flc_flock, fl_list) { |
865 | struct file_lock *fl = *before; | ||
866 | if (IS_POSIX(fl)) | ||
867 | break; | ||
868 | if (IS_LEASE(fl)) | ||
869 | continue; | ||
870 | if (filp != fl->fl_file) | 889 | if (filp != fl->fl_file) |
871 | continue; | 890 | continue; |
872 | if (request->fl_type == fl->fl_type) | 891 | if (request->fl_type == fl->fl_type) |
873 | goto out; | 892 | goto out; |
874 | found = 1; | 893 | found = true; |
875 | locks_delete_lock(before, &dispose); | 894 | locks_delete_lock_ctx(fl, &dispose); |
876 | break; | 895 | break; |
877 | } | 896 | } |
878 | 897 | ||
@@ -882,23 +901,8 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) | |||
882 | goto out; | 901 | goto out; |
883 | } | 902 | } |
884 | 903 | ||
885 | /* | ||
886 | * If a higher-priority process was blocked on the old file lock, | ||
887 | * give it the opportunity to lock the file. | ||
888 | */ | ||
889 | if (found) { | ||
890 | spin_unlock(&inode->i_lock); | ||
891 | cond_resched(); | ||
892 | spin_lock(&inode->i_lock); | ||
893 | } | ||
894 | |||
895 | find_conflict: | 904 | find_conflict: |
896 | for_each_lock(inode, before) { | 905 | list_for_each_entry(fl, &ctx->flc_flock, fl_list) { |
897 | struct file_lock *fl = *before; | ||
898 | if (IS_POSIX(fl)) | ||
899 | break; | ||
900 | if (IS_LEASE(fl)) | ||
901 | continue; | ||
902 | if (!flock_locks_conflict(request, fl)) | 906 | if (!flock_locks_conflict(request, fl)) |
903 | continue; | 907 | continue; |
904 | error = -EAGAIN; | 908 | error = -EAGAIN; |
@@ -911,12 +915,12 @@ find_conflict: | |||
911 | if (request->fl_flags & FL_ACCESS) | 915 | if (request->fl_flags & FL_ACCESS) |
912 | goto out; | 916 | goto out; |
913 | locks_copy_lock(new_fl, request); | 917 | locks_copy_lock(new_fl, request); |
914 | locks_insert_lock(before, new_fl); | 918 | locks_insert_lock_ctx(new_fl, &ctx->flc_flock); |
915 | new_fl = NULL; | 919 | new_fl = NULL; |
916 | error = 0; | 920 | error = 0; |
917 | 921 | ||
918 | out: | 922 | out: |
919 | spin_unlock(&inode->i_lock); | 923 | spin_unlock(&ctx->flc_lock); |
920 | if (new_fl) | 924 | if (new_fl) |
921 | locks_free_lock(new_fl); | 925 | locks_free_lock(new_fl); |
922 | locks_dispose_list(&dispose); | 926 | locks_dispose_list(&dispose); |
@@ -925,16 +929,20 @@ out: | |||
925 | 929 | ||
926 | static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock) | 930 | static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock) |
927 | { | 931 | { |
928 | struct file_lock *fl; | 932 | struct file_lock *fl, *tmp; |
929 | struct file_lock *new_fl = NULL; | 933 | struct file_lock *new_fl = NULL; |
930 | struct file_lock *new_fl2 = NULL; | 934 | struct file_lock *new_fl2 = NULL; |
931 | struct file_lock *left = NULL; | 935 | struct file_lock *left = NULL; |
932 | struct file_lock *right = NULL; | 936 | struct file_lock *right = NULL; |
933 | struct file_lock **before; | 937 | struct file_lock_context *ctx; |
934 | int error; | 938 | int error; |
935 | bool added = false; | 939 | bool added = false; |
936 | LIST_HEAD(dispose); | 940 | LIST_HEAD(dispose); |
937 | 941 | ||
942 | ctx = locks_get_lock_context(inode); | ||
943 | if (!ctx) | ||
944 | return -ENOMEM; | ||
945 | |||
938 | /* | 946 | /* |
939 | * We may need two file_lock structures for this operation, | 947 | * We may need two file_lock structures for this operation, |
940 | * so we get them in advance to avoid races. | 948 | * so we get them in advance to avoid races. |
@@ -948,15 +956,14 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
948 | new_fl2 = locks_alloc_lock(); | 956 | new_fl2 = locks_alloc_lock(); |
949 | } | 957 | } |
950 | 958 | ||
951 | spin_lock(&inode->i_lock); | 959 | spin_lock(&ctx->flc_lock); |
952 | /* | 960 | /* |
953 | * New lock request. Walk all POSIX locks and look for conflicts. If | 961 | * New lock request. Walk all POSIX locks and look for conflicts. If |
954 | * there are any, either return error or put the request on the | 962 | * there are any, either return error or put the request on the |
955 | * blocker's list of waiters and the global blocked_hash. | 963 | * blocker's list of waiters and the global blocked_hash. |
956 | */ | 964 | */ |
957 | if (request->fl_type != F_UNLCK) { | 965 | if (request->fl_type != F_UNLCK) { |
958 | for_each_lock(inode, before) { | 966 | list_for_each_entry(fl, &ctx->flc_posix, fl_list) { |
959 | fl = *before; | ||
960 | if (!IS_POSIX(fl)) | 967 | if (!IS_POSIX(fl)) |
961 | continue; | 968 | continue; |
962 | if (!posix_locks_conflict(request, fl)) | 969 | if (!posix_locks_conflict(request, fl)) |
@@ -986,29 +993,25 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
986 | if (request->fl_flags & FL_ACCESS) | 993 | if (request->fl_flags & FL_ACCESS) |
987 | goto out; | 994 | goto out; |
988 | 995 | ||
989 | /* | 996 | /* Find the first old lock with the same owner as the new lock */ |
990 | * Find the first old lock with the same owner as the new lock. | 997 | list_for_each_entry(fl, &ctx->flc_posix, fl_list) { |
991 | */ | 998 | if (posix_same_owner(request, fl)) |
992 | 999 | break; | |
993 | before = &inode->i_flock; | ||
994 | |||
995 | /* First skip locks owned by other processes. */ | ||
996 | while ((fl = *before) && (!IS_POSIX(fl) || | ||
997 | !posix_same_owner(request, fl))) { | ||
998 | before = &fl->fl_next; | ||
999 | } | 1000 | } |
1000 | 1001 | ||
1001 | /* Process locks with this owner. */ | 1002 | /* Process locks with this owner. */ |
1002 | while ((fl = *before) && posix_same_owner(request, fl)) { | 1003 | list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, fl_list) { |
1003 | /* Detect adjacent or overlapping regions (if same lock type) | 1004 | if (!posix_same_owner(request, fl)) |
1004 | */ | 1005 | break; |
1006 | |||
1007 | /* Detect adjacent or overlapping regions (if same lock type) */ | ||
1005 | if (request->fl_type == fl->fl_type) { | 1008 | if (request->fl_type == fl->fl_type) { |
1006 | /* In all comparisons of start vs end, use | 1009 | /* In all comparisons of start vs end, use |
1007 | * "start - 1" rather than "end + 1". If end | 1010 | * "start - 1" rather than "end + 1". If end |
1008 | * is OFFSET_MAX, end + 1 will become negative. | 1011 | * is OFFSET_MAX, end + 1 will become negative. |
1009 | */ | 1012 | */ |
1010 | if (fl->fl_end < request->fl_start - 1) | 1013 | if (fl->fl_end < request->fl_start - 1) |
1011 | goto next_lock; | 1014 | continue; |
1012 | /* If the next lock in the list has entirely bigger | 1015 | /* If the next lock in the list has entirely bigger |
1013 | * addresses than the new one, insert the lock here. | 1016 | * addresses than the new one, insert the lock here. |
1014 | */ | 1017 | */ |
@@ -1029,18 +1032,17 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
1029 | else | 1032 | else |
1030 | request->fl_end = fl->fl_end; | 1033 | request->fl_end = fl->fl_end; |
1031 | if (added) { | 1034 | if (added) { |
1032 | locks_delete_lock(before, &dispose); | 1035 | locks_delete_lock_ctx(fl, &dispose); |
1033 | continue; | 1036 | continue; |
1034 | } | 1037 | } |
1035 | request = fl; | 1038 | request = fl; |
1036 | added = true; | 1039 | added = true; |
1037 | } | 1040 | } else { |
1038 | else { | ||
1039 | /* Processing for different lock types is a bit | 1041 | /* Processing for different lock types is a bit |
1040 | * more complex. | 1042 | * more complex. |
1041 | */ | 1043 | */ |
1042 | if (fl->fl_end < request->fl_start) | 1044 | if (fl->fl_end < request->fl_start) |
1043 | goto next_lock; | 1045 | continue; |
1044 | if (fl->fl_start > request->fl_end) | 1046 | if (fl->fl_start > request->fl_end) |
1045 | break; | 1047 | break; |
1046 | if (request->fl_type == F_UNLCK) | 1048 | if (request->fl_type == F_UNLCK) |
@@ -1059,7 +1061,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
1059 | * one (This may happen several times). | 1061 | * one (This may happen several times). |
1060 | */ | 1062 | */ |
1061 | if (added) { | 1063 | if (added) { |
1062 | locks_delete_lock(before, &dispose); | 1064 | locks_delete_lock_ctx(fl, &dispose); |
1063 | continue; | 1065 | continue; |
1064 | } | 1066 | } |
1065 | /* | 1067 | /* |
@@ -1075,15 +1077,11 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
1075 | locks_copy_lock(new_fl, request); | 1077 | locks_copy_lock(new_fl, request); |
1076 | request = new_fl; | 1078 | request = new_fl; |
1077 | new_fl = NULL; | 1079 | new_fl = NULL; |
1078 | locks_delete_lock(before, &dispose); | 1080 | locks_insert_lock_ctx(request, &fl->fl_list); |
1079 | locks_insert_lock(before, request); | 1081 | locks_delete_lock_ctx(fl, &dispose); |
1080 | added = true; | 1082 | added = true; |
1081 | } | 1083 | } |
1082 | } | 1084 | } |
1083 | /* Go on to next lock. | ||
1084 | */ | ||
1085 | next_lock: | ||
1086 | before = &fl->fl_next; | ||
1087 | } | 1085 | } |
1088 | 1086 | ||
1089 | /* | 1087 | /* |
@@ -1108,7 +1106,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
1108 | goto out; | 1106 | goto out; |
1109 | } | 1107 | } |
1110 | locks_copy_lock(new_fl, request); | 1108 | locks_copy_lock(new_fl, request); |
1111 | locks_insert_lock(before, new_fl); | 1109 | locks_insert_lock_ctx(new_fl, &fl->fl_list); |
1110 | fl = new_fl; | ||
1112 | new_fl = NULL; | 1111 | new_fl = NULL; |
1113 | } | 1112 | } |
1114 | if (right) { | 1113 | if (right) { |
@@ -1119,7 +1118,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
1119 | left = new_fl2; | 1118 | left = new_fl2; |
1120 | new_fl2 = NULL; | 1119 | new_fl2 = NULL; |
1121 | locks_copy_lock(left, right); | 1120 | locks_copy_lock(left, right); |
1122 | locks_insert_lock(before, left); | 1121 | locks_insert_lock_ctx(left, &fl->fl_list); |
1123 | } | 1122 | } |
1124 | right->fl_start = request->fl_end + 1; | 1123 | right->fl_start = request->fl_end + 1; |
1125 | locks_wake_up_blocks(right); | 1124 | locks_wake_up_blocks(right); |
@@ -1129,7 +1128,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
1129 | locks_wake_up_blocks(left); | 1128 | locks_wake_up_blocks(left); |
1130 | } | 1129 | } |
1131 | out: | 1130 | out: |
1132 | spin_unlock(&inode->i_lock); | 1131 | spin_unlock(&ctx->flc_lock); |
1133 | /* | 1132 | /* |
1134 | * Free any unused locks. | 1133 | * Free any unused locks. |
1135 | */ | 1134 | */ |
@@ -1199,22 +1198,29 @@ EXPORT_SYMBOL(posix_lock_file_wait); | |||
1199 | */ | 1198 | */ |
1200 | int locks_mandatory_locked(struct file *file) | 1199 | int locks_mandatory_locked(struct file *file) |
1201 | { | 1200 | { |
1201 | int ret; | ||
1202 | struct inode *inode = file_inode(file); | 1202 | struct inode *inode = file_inode(file); |
1203 | struct file_lock_context *ctx; | ||
1203 | struct file_lock *fl; | 1204 | struct file_lock *fl; |
1204 | 1205 | ||
1206 | ctx = inode->i_flctx; | ||
1207 | if (!ctx || list_empty_careful(&ctx->flc_posix)) | ||
1208 | return 0; | ||
1209 | |||
1205 | /* | 1210 | /* |
1206 | * Search the lock list for this inode for any POSIX locks. | 1211 | * Search the lock list for this inode for any POSIX locks. |
1207 | */ | 1212 | */ |
1208 | spin_lock(&inode->i_lock); | 1213 | spin_lock(&ctx->flc_lock); |
1209 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 1214 | ret = 0; |
1210 | if (!IS_POSIX(fl)) | 1215 | list_for_each_entry(fl, &ctx->flc_posix, fl_list) { |
1211 | continue; | ||
1212 | if (fl->fl_owner != current->files && | 1216 | if (fl->fl_owner != current->files && |
1213 | fl->fl_owner != file) | 1217 | fl->fl_owner != file) { |
1218 | ret = -EAGAIN; | ||
1214 | break; | 1219 | break; |
1220 | } | ||
1215 | } | 1221 | } |
1216 | spin_unlock(&inode->i_lock); | 1222 | spin_unlock(&ctx->flc_lock); |
1217 | return fl ? -EAGAIN : 0; | 1223 | return ret; |
1218 | } | 1224 | } |
1219 | 1225 | ||
1220 | /** | 1226 | /** |
@@ -1294,9 +1300,8 @@ static void lease_clear_pending(struct file_lock *fl, int arg) | |||
1294 | } | 1300 | } |
1295 | 1301 | ||
1296 | /* We already had a lease on this file; just change its type */ | 1302 | /* We already had a lease on this file; just change its type */ |
1297 | int lease_modify(struct file_lock **before, int arg, struct list_head *dispose) | 1303 | int lease_modify(struct file_lock *fl, int arg, struct list_head *dispose) |
1298 | { | 1304 | { |
1299 | struct file_lock *fl = *before; | ||
1300 | int error = assign_type(fl, arg); | 1305 | int error = assign_type(fl, arg); |
1301 | 1306 | ||
1302 | if (error) | 1307 | if (error) |
@@ -1313,7 +1318,7 @@ int lease_modify(struct file_lock **before, int arg, struct list_head *dispose) | |||
1313 | printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); | 1318 | printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); |
1314 | fl->fl_fasync = NULL; | 1319 | fl->fl_fasync = NULL; |
1315 | } | 1320 | } |
1316 | locks_delete_lock(before, dispose); | 1321 | locks_delete_lock_ctx(fl, dispose); |
1317 | } | 1322 | } |
1318 | return 0; | 1323 | return 0; |
1319 | } | 1324 | } |
@@ -1329,25 +1334,24 @@ static bool past_time(unsigned long then) | |||
1329 | 1334 | ||
1330 | static void time_out_leases(struct inode *inode, struct list_head *dispose) | 1335 | static void time_out_leases(struct inode *inode, struct list_head *dispose) |
1331 | { | 1336 | { |
1332 | struct file_lock **before; | 1337 | struct file_lock_context *ctx = inode->i_flctx; |
1333 | struct file_lock *fl; | 1338 | struct file_lock *fl, *tmp; |
1334 | 1339 | ||
1335 | lockdep_assert_held(&inode->i_lock); | 1340 | lockdep_assert_held(&ctx->flc_lock); |
1336 | 1341 | ||
1337 | before = &inode->i_flock; | 1342 | list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) { |
1338 | while ((fl = *before) && IS_LEASE(fl) && lease_breaking(fl)) { | ||
1339 | trace_time_out_leases(inode, fl); | 1343 | trace_time_out_leases(inode, fl); |
1340 | if (past_time(fl->fl_downgrade_time)) | 1344 | if (past_time(fl->fl_downgrade_time)) |
1341 | lease_modify(before, F_RDLCK, dispose); | 1345 | lease_modify(fl, F_RDLCK, dispose); |
1342 | if (past_time(fl->fl_break_time)) | 1346 | if (past_time(fl->fl_break_time)) |
1343 | lease_modify(before, F_UNLCK, dispose); | 1347 | lease_modify(fl, F_UNLCK, dispose); |
1344 | if (fl == *before) /* lease_modify may have freed fl */ | ||
1345 | before = &fl->fl_next; | ||
1346 | } | 1348 | } |
1347 | } | 1349 | } |
1348 | 1350 | ||
1349 | static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) | 1351 | static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) |
1350 | { | 1352 | { |
1353 | if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) | ||
1354 | return false; | ||
1351 | if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) | 1355 | if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) |
1352 | return false; | 1356 | return false; |
1353 | return locks_conflict(breaker, lease); | 1357 | return locks_conflict(breaker, lease); |
@@ -1356,11 +1360,12 @@ static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) | |||
1356 | static bool | 1360 | static bool |
1357 | any_leases_conflict(struct inode *inode, struct file_lock *breaker) | 1361 | any_leases_conflict(struct inode *inode, struct file_lock *breaker) |
1358 | { | 1362 | { |
1363 | struct file_lock_context *ctx = inode->i_flctx; | ||
1359 | struct file_lock *fl; | 1364 | struct file_lock *fl; |
1360 | 1365 | ||
1361 | lockdep_assert_held(&inode->i_lock); | 1366 | lockdep_assert_held(&ctx->flc_lock); |
1362 | 1367 | ||
1363 | for (fl = inode->i_flock ; fl && IS_LEASE(fl); fl = fl->fl_next) { | 1368 | list_for_each_entry(fl, &ctx->flc_lease, fl_list) { |
1364 | if (leases_conflict(fl, breaker)) | 1369 | if (leases_conflict(fl, breaker)) |
1365 | return true; | 1370 | return true; |
1366 | } | 1371 | } |
@@ -1384,7 +1389,8 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) | |||
1384 | { | 1389 | { |
1385 | int error = 0; | 1390 | int error = 0; |
1386 | struct file_lock *new_fl; | 1391 | struct file_lock *new_fl; |
1387 | struct file_lock *fl, **before; | 1392 | struct file_lock_context *ctx = inode->i_flctx; |
1393 | struct file_lock *fl; | ||
1388 | unsigned long break_time; | 1394 | unsigned long break_time; |
1389 | int want_write = (mode & O_ACCMODE) != O_RDONLY; | 1395 | int want_write = (mode & O_ACCMODE) != O_RDONLY; |
1390 | LIST_HEAD(dispose); | 1396 | LIST_HEAD(dispose); |
@@ -1394,7 +1400,13 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) | |||
1394 | return PTR_ERR(new_fl); | 1400 | return PTR_ERR(new_fl); |
1395 | new_fl->fl_flags = type; | 1401 | new_fl->fl_flags = type; |
1396 | 1402 | ||
1397 | spin_lock(&inode->i_lock); | 1403 | /* typically we will check that ctx is non-NULL before calling */ |
1404 | if (!ctx) { | ||
1405 | WARN_ON_ONCE(1); | ||
1406 | return error; | ||
1407 | } | ||
1408 | |||
1409 | spin_lock(&ctx->flc_lock); | ||
1398 | 1410 | ||
1399 | time_out_leases(inode, &dispose); | 1411 | time_out_leases(inode, &dispose); |
1400 | 1412 | ||
@@ -1408,9 +1420,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) | |||
1408 | break_time++; /* so that 0 means no break time */ | 1420 | break_time++; /* so that 0 means no break time */ |
1409 | } | 1421 | } |
1410 | 1422 | ||
1411 | for (before = &inode->i_flock; | 1423 | list_for_each_entry(fl, &ctx->flc_lease, fl_list) { |
1412 | ((fl = *before) != NULL) && IS_LEASE(fl); | ||
1413 | before = &fl->fl_next) { | ||
1414 | if (!leases_conflict(fl, new_fl)) | 1424 | if (!leases_conflict(fl, new_fl)) |
1415 | continue; | 1425 | continue; |
1416 | if (want_write) { | 1426 | if (want_write) { |
@@ -1419,17 +1429,16 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) | |||
1419 | fl->fl_flags |= FL_UNLOCK_PENDING; | 1429 | fl->fl_flags |= FL_UNLOCK_PENDING; |
1420 | fl->fl_break_time = break_time; | 1430 | fl->fl_break_time = break_time; |
1421 | } else { | 1431 | } else { |
1422 | if (lease_breaking(inode->i_flock)) | 1432 | if (lease_breaking(fl)) |
1423 | continue; | 1433 | continue; |
1424 | fl->fl_flags |= FL_DOWNGRADE_PENDING; | 1434 | fl->fl_flags |= FL_DOWNGRADE_PENDING; |
1425 | fl->fl_downgrade_time = break_time; | 1435 | fl->fl_downgrade_time = break_time; |
1426 | } | 1436 | } |
1427 | if (fl->fl_lmops->lm_break(fl)) | 1437 | if (fl->fl_lmops->lm_break(fl)) |
1428 | locks_delete_lock(before, &dispose); | 1438 | locks_delete_lock_ctx(fl, &dispose); |
1429 | } | 1439 | } |
1430 | 1440 | ||
1431 | fl = inode->i_flock; | 1441 | if (list_empty(&ctx->flc_lease)) |
1432 | if (!fl || !IS_LEASE(fl)) | ||
1433 | goto out; | 1442 | goto out; |
1434 | 1443 | ||
1435 | if (mode & O_NONBLOCK) { | 1444 | if (mode & O_NONBLOCK) { |
@@ -1439,18 +1448,19 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) | |||
1439 | } | 1448 | } |
1440 | 1449 | ||
1441 | restart: | 1450 | restart: |
1442 | break_time = inode->i_flock->fl_break_time; | 1451 | fl = list_first_entry(&ctx->flc_lease, struct file_lock, fl_list); |
1452 | break_time = fl->fl_break_time; | ||
1443 | if (break_time != 0) | 1453 | if (break_time != 0) |
1444 | break_time -= jiffies; | 1454 | break_time -= jiffies; |
1445 | if (break_time == 0) | 1455 | if (break_time == 0) |
1446 | break_time++; | 1456 | break_time++; |
1447 | locks_insert_block(inode->i_flock, new_fl); | 1457 | locks_insert_block(fl, new_fl); |
1448 | trace_break_lease_block(inode, new_fl); | 1458 | trace_break_lease_block(inode, new_fl); |
1449 | spin_unlock(&inode->i_lock); | 1459 | spin_unlock(&ctx->flc_lock); |
1450 | locks_dispose_list(&dispose); | 1460 | locks_dispose_list(&dispose); |
1451 | error = wait_event_interruptible_timeout(new_fl->fl_wait, | 1461 | error = wait_event_interruptible_timeout(new_fl->fl_wait, |
1452 | !new_fl->fl_next, break_time); | 1462 | !new_fl->fl_next, break_time); |
1453 | spin_lock(&inode->i_lock); | 1463 | spin_lock(&ctx->flc_lock); |
1454 | trace_break_lease_unblock(inode, new_fl); | 1464 | trace_break_lease_unblock(inode, new_fl); |
1455 | locks_delete_block(new_fl); | 1465 | locks_delete_block(new_fl); |
1456 | if (error >= 0) { | 1466 | if (error >= 0) { |
@@ -1462,12 +1472,10 @@ restart: | |||
1462 | time_out_leases(inode, &dispose); | 1472 | time_out_leases(inode, &dispose); |
1463 | if (any_leases_conflict(inode, new_fl)) | 1473 | if (any_leases_conflict(inode, new_fl)) |
1464 | goto restart; | 1474 | goto restart; |
1465 | |||
1466 | error = 0; | 1475 | error = 0; |
1467 | } | 1476 | } |
1468 | |||
1469 | out: | 1477 | out: |
1470 | spin_unlock(&inode->i_lock); | 1478 | spin_unlock(&ctx->flc_lock); |
1471 | locks_dispose_list(&dispose); | 1479 | locks_dispose_list(&dispose); |
1472 | locks_free_lock(new_fl); | 1480 | locks_free_lock(new_fl); |
1473 | return error; | 1481 | return error; |
@@ -1487,14 +1495,18 @@ EXPORT_SYMBOL(__break_lease); | |||
1487 | void lease_get_mtime(struct inode *inode, struct timespec *time) | 1495 | void lease_get_mtime(struct inode *inode, struct timespec *time) |
1488 | { | 1496 | { |
1489 | bool has_lease = false; | 1497 | bool has_lease = false; |
1490 | struct file_lock *flock; | 1498 | struct file_lock_context *ctx = inode->i_flctx; |
1499 | struct file_lock *fl; | ||
1491 | 1500 | ||
1492 | if (inode->i_flock) { | 1501 | if (ctx && !list_empty_careful(&ctx->flc_lease)) { |
1493 | spin_lock(&inode->i_lock); | 1502 | spin_lock(&ctx->flc_lock); |
1494 | flock = inode->i_flock; | 1503 | if (!list_empty(&ctx->flc_lease)) { |
1495 | if (flock && IS_LEASE(flock) && (flock->fl_type == F_WRLCK)) | 1504 | fl = list_first_entry(&ctx->flc_lease, |
1496 | has_lease = true; | 1505 | struct file_lock, fl_list); |
1497 | spin_unlock(&inode->i_lock); | 1506 | if (fl->fl_type == F_WRLCK) |
1507 | has_lease = true; | ||
1508 | } | ||
1509 | spin_unlock(&ctx->flc_lock); | ||
1498 | } | 1510 | } |
1499 | 1511 | ||
1500 | if (has_lease) | 1512 | if (has_lease) |
@@ -1532,20 +1544,22 @@ int fcntl_getlease(struct file *filp) | |||
1532 | { | 1544 | { |
1533 | struct file_lock *fl; | 1545 | struct file_lock *fl; |
1534 | struct inode *inode = file_inode(filp); | 1546 | struct inode *inode = file_inode(filp); |
1547 | struct file_lock_context *ctx = inode->i_flctx; | ||
1535 | int type = F_UNLCK; | 1548 | int type = F_UNLCK; |
1536 | LIST_HEAD(dispose); | 1549 | LIST_HEAD(dispose); |
1537 | 1550 | ||
1538 | spin_lock(&inode->i_lock); | 1551 | if (ctx && !list_empty_careful(&ctx->flc_lease)) { |
1539 | time_out_leases(file_inode(filp), &dispose); | 1552 | spin_lock(&ctx->flc_lock); |
1540 | for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl); | 1553 | time_out_leases(file_inode(filp), &dispose); |
1541 | fl = fl->fl_next) { | 1554 | list_for_each_entry(fl, &ctx->flc_lease, fl_list) { |
1542 | if (fl->fl_file == filp) { | 1555 | if (fl->fl_file != filp) |
1556 | continue; | ||
1543 | type = target_leasetype(fl); | 1557 | type = target_leasetype(fl); |
1544 | break; | 1558 | break; |
1545 | } | 1559 | } |
1560 | spin_unlock(&ctx->flc_lock); | ||
1561 | locks_dispose_list(&dispose); | ||
1546 | } | 1562 | } |
1547 | spin_unlock(&inode->i_lock); | ||
1548 | locks_dispose_list(&dispose); | ||
1549 | return type; | 1563 | return type; |
1550 | } | 1564 | } |
1551 | 1565 | ||
@@ -1560,11 +1574,14 @@ int fcntl_getlease(struct file *filp) | |||
1560 | * conflict with the lease we're trying to set. | 1574 | * conflict with the lease we're trying to set. |
1561 | */ | 1575 | */ |
1562 | static int | 1576 | static int |
1563 | check_conflicting_open(const struct dentry *dentry, const long arg) | 1577 | check_conflicting_open(const struct dentry *dentry, const long arg, int flags) |
1564 | { | 1578 | { |
1565 | int ret = 0; | 1579 | int ret = 0; |
1566 | struct inode *inode = dentry->d_inode; | 1580 | struct inode *inode = dentry->d_inode; |
1567 | 1581 | ||
1582 | if (flags & FL_LAYOUT) | ||
1583 | return 0; | ||
1584 | |||
1568 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) | 1585 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) |
1569 | return -EAGAIN; | 1586 | return -EAGAIN; |
1570 | 1587 | ||
@@ -1578,9 +1595,10 @@ check_conflicting_open(const struct dentry *dentry, const long arg) | |||
1578 | static int | 1595 | static int |
1579 | generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv) | 1596 | generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv) |
1580 | { | 1597 | { |
1581 | struct file_lock *fl, **before, **my_before = NULL, *lease; | 1598 | struct file_lock *fl, *my_fl = NULL, *lease; |
1582 | struct dentry *dentry = filp->f_path.dentry; | 1599 | struct dentry *dentry = filp->f_path.dentry; |
1583 | struct inode *inode = dentry->d_inode; | 1600 | struct inode *inode = dentry->d_inode; |
1601 | struct file_lock_context *ctx; | ||
1584 | bool is_deleg = (*flp)->fl_flags & FL_DELEG; | 1602 | bool is_deleg = (*flp)->fl_flags & FL_DELEG; |
1585 | int error; | 1603 | int error; |
1586 | LIST_HEAD(dispose); | 1604 | LIST_HEAD(dispose); |
@@ -1588,6 +1606,10 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr | |||
1588 | lease = *flp; | 1606 | lease = *flp; |
1589 | trace_generic_add_lease(inode, lease); | 1607 | trace_generic_add_lease(inode, lease); |
1590 | 1608 | ||
1609 | ctx = locks_get_lock_context(inode); | ||
1610 | if (!ctx) | ||
1611 | return -ENOMEM; | ||
1612 | |||
1591 | /* | 1613 | /* |
1592 | * In the delegation case we need mutual exclusion with | 1614 | * In the delegation case we need mutual exclusion with |
1593 | * a number of operations that take the i_mutex. We trylock | 1615 | * a number of operations that take the i_mutex. We trylock |
@@ -1606,9 +1628,9 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr | |||
1606 | return -EINVAL; | 1628 | return -EINVAL; |
1607 | } | 1629 | } |
1608 | 1630 | ||
1609 | spin_lock(&inode->i_lock); | 1631 | spin_lock(&ctx->flc_lock); |
1610 | time_out_leases(inode, &dispose); | 1632 | time_out_leases(inode, &dispose); |
1611 | error = check_conflicting_open(dentry, arg); | 1633 | error = check_conflicting_open(dentry, arg, lease->fl_flags); |
1612 | if (error) | 1634 | if (error) |
1613 | goto out; | 1635 | goto out; |
1614 | 1636 | ||
@@ -1621,13 +1643,13 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr | |||
1621 | * except for this filp. | 1643 | * except for this filp. |
1622 | */ | 1644 | */ |
1623 | error = -EAGAIN; | 1645 | error = -EAGAIN; |
1624 | for (before = &inode->i_flock; | 1646 | list_for_each_entry(fl, &ctx->flc_lease, fl_list) { |
1625 | ((fl = *before) != NULL) && IS_LEASE(fl); | 1647 | if (fl->fl_file == filp && |
1626 | before = &fl->fl_next) { | 1648 | fl->fl_owner == lease->fl_owner) { |
1627 | if (fl->fl_file == filp) { | 1649 | my_fl = fl; |
1628 | my_before = before; | ||
1629 | continue; | 1650 | continue; |
1630 | } | 1651 | } |
1652 | |||
1631 | /* | 1653 | /* |
1632 | * No exclusive leases if someone else has a lease on | 1654 | * No exclusive leases if someone else has a lease on |
1633 | * this file: | 1655 | * this file: |
@@ -1642,9 +1664,8 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr | |||
1642 | goto out; | 1664 | goto out; |
1643 | } | 1665 | } |
1644 | 1666 | ||
1645 | if (my_before != NULL) { | 1667 | if (my_fl != NULL) { |
1646 | lease = *my_before; | 1668 | error = lease->fl_lmops->lm_change(my_fl, arg, &dispose); |
1647 | error = lease->fl_lmops->lm_change(my_before, arg, &dispose); | ||
1648 | if (error) | 1669 | if (error) |
1649 | goto out; | 1670 | goto out; |
1650 | goto out_setup; | 1671 | goto out_setup; |
@@ -1654,7 +1675,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr | |||
1654 | if (!leases_enable) | 1675 | if (!leases_enable) |
1655 | goto out; | 1676 | goto out; |
1656 | 1677 | ||
1657 | locks_insert_lock(before, lease); | 1678 | locks_insert_lock_ctx(lease, &ctx->flc_lease); |
1658 | /* | 1679 | /* |
1659 | * The check in break_lease() is lockless. It's possible for another | 1680 | * The check in break_lease() is lockless. It's possible for another |
1660 | * open to race in after we did the earlier check for a conflicting | 1681 | * open to race in after we did the earlier check for a conflicting |
@@ -1665,46 +1686,51 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr | |||
1665 | * precedes these checks. | 1686 | * precedes these checks. |
1666 | */ | 1687 | */ |
1667 | smp_mb(); | 1688 | smp_mb(); |
1668 | error = check_conflicting_open(dentry, arg); | 1689 | error = check_conflicting_open(dentry, arg, lease->fl_flags); |
1669 | if (error) | 1690 | if (error) { |
1670 | goto out_unlink; | 1691 | locks_unlink_lock_ctx(lease); |
1692 | goto out; | ||
1693 | } | ||
1671 | 1694 | ||
1672 | out_setup: | 1695 | out_setup: |
1673 | if (lease->fl_lmops->lm_setup) | 1696 | if (lease->fl_lmops->lm_setup) |
1674 | lease->fl_lmops->lm_setup(lease, priv); | 1697 | lease->fl_lmops->lm_setup(lease, priv); |
1675 | out: | 1698 | out: |
1676 | spin_unlock(&inode->i_lock); | 1699 | spin_unlock(&ctx->flc_lock); |
1677 | locks_dispose_list(&dispose); | 1700 | locks_dispose_list(&dispose); |
1678 | if (is_deleg) | 1701 | if (is_deleg) |
1679 | mutex_unlock(&inode->i_mutex); | 1702 | mutex_unlock(&inode->i_mutex); |
1680 | if (!error && !my_before) | 1703 | if (!error && !my_fl) |
1681 | *flp = NULL; | 1704 | *flp = NULL; |
1682 | return error; | 1705 | return error; |
1683 | out_unlink: | ||
1684 | locks_unlink_lock(before); | ||
1685 | goto out; | ||
1686 | } | 1706 | } |
1687 | 1707 | ||
1688 | static int generic_delete_lease(struct file *filp) | 1708 | static int generic_delete_lease(struct file *filp, void *owner) |
1689 | { | 1709 | { |
1690 | int error = -EAGAIN; | 1710 | int error = -EAGAIN; |
1691 | struct file_lock *fl, **before; | 1711 | struct file_lock *fl, *victim = NULL; |
1692 | struct dentry *dentry = filp->f_path.dentry; | 1712 | struct dentry *dentry = filp->f_path.dentry; |
1693 | struct inode *inode = dentry->d_inode; | 1713 | struct inode *inode = dentry->d_inode; |
1714 | struct file_lock_context *ctx = inode->i_flctx; | ||
1694 | LIST_HEAD(dispose); | 1715 | LIST_HEAD(dispose); |
1695 | 1716 | ||
1696 | spin_lock(&inode->i_lock); | 1717 | if (!ctx) { |
1697 | time_out_leases(inode, &dispose); | 1718 | trace_generic_delete_lease(inode, NULL); |
1698 | for (before = &inode->i_flock; | 1719 | return error; |
1699 | ((fl = *before) != NULL) && IS_LEASE(fl); | 1720 | } |
1700 | before = &fl->fl_next) { | 1721 | |
1701 | if (fl->fl_file == filp) | 1722 | spin_lock(&ctx->flc_lock); |
1723 | list_for_each_entry(fl, &ctx->flc_lease, fl_list) { | ||
1724 | if (fl->fl_file == filp && | ||
1725 | fl->fl_owner == owner) { | ||
1726 | victim = fl; | ||
1702 | break; | 1727 | break; |
1728 | } | ||
1703 | } | 1729 | } |
1704 | trace_generic_delete_lease(inode, fl); | 1730 | trace_generic_delete_lease(inode, fl); |
1705 | if (fl && IS_LEASE(fl)) | 1731 | if (victim) |
1706 | error = fl->fl_lmops->lm_change(before, F_UNLCK, &dispose); | 1732 | error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); |
1707 | spin_unlock(&inode->i_lock); | 1733 | spin_unlock(&ctx->flc_lock); |
1708 | locks_dispose_list(&dispose); | 1734 | locks_dispose_list(&dispose); |
1709 | return error; | 1735 | return error; |
1710 | } | 1736 | } |
@@ -1737,13 +1763,14 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp, | |||
1737 | 1763 | ||
1738 | switch (arg) { | 1764 | switch (arg) { |
1739 | case F_UNLCK: | 1765 | case F_UNLCK: |
1740 | return generic_delete_lease(filp); | 1766 | return generic_delete_lease(filp, *priv); |
1741 | case F_RDLCK: | 1767 | case F_RDLCK: |
1742 | case F_WRLCK: | 1768 | case F_WRLCK: |
1743 | if (!(*flp)->fl_lmops->lm_break) { | 1769 | if (!(*flp)->fl_lmops->lm_break) { |
1744 | WARN_ON_ONCE(1); | 1770 | WARN_ON_ONCE(1); |
1745 | return -ENOLCK; | 1771 | return -ENOLCK; |
1746 | } | 1772 | } |
1773 | |||
1747 | return generic_add_lease(filp, arg, flp, priv); | 1774 | return generic_add_lease(filp, arg, flp, priv); |
1748 | default: | 1775 | default: |
1749 | return -EINVAL; | 1776 | return -EINVAL; |
@@ -1816,7 +1843,7 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) | |||
1816 | int fcntl_setlease(unsigned int fd, struct file *filp, long arg) | 1843 | int fcntl_setlease(unsigned int fd, struct file *filp, long arg) |
1817 | { | 1844 | { |
1818 | if (arg == F_UNLCK) | 1845 | if (arg == F_UNLCK) |
1819 | return vfs_setlease(filp, F_UNLCK, NULL, NULL); | 1846 | return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp); |
1820 | return do_fcntl_add_lease(fd, filp, arg); | 1847 | return do_fcntl_add_lease(fd, filp, arg); |
1821 | } | 1848 | } |
1822 | 1849 | ||
@@ -2171,7 +2198,7 @@ again: | |||
2171 | */ | 2198 | */ |
2172 | /* | 2199 | /* |
2173 | * we need that spin_lock here - it prevents reordering between | 2200 | * we need that spin_lock here - it prevents reordering between |
2174 | * update of inode->i_flock and check for it done in close(). | 2201 | * update of i_flctx->flc_posix and check for it done in close(). |
2175 | * rcu_read_lock() wouldn't do. | 2202 | * rcu_read_lock() wouldn't do. |
2176 | */ | 2203 | */ |
2177 | spin_lock(¤t->files->file_lock); | 2204 | spin_lock(¤t->files->file_lock); |
@@ -2331,13 +2358,14 @@ out: | |||
2331 | void locks_remove_posix(struct file *filp, fl_owner_t owner) | 2358 | void locks_remove_posix(struct file *filp, fl_owner_t owner) |
2332 | { | 2359 | { |
2333 | struct file_lock lock; | 2360 | struct file_lock lock; |
2361 | struct file_lock_context *ctx = file_inode(filp)->i_flctx; | ||
2334 | 2362 | ||
2335 | /* | 2363 | /* |
2336 | * If there are no locks held on this file, we don't need to call | 2364 | * If there are no locks held on this file, we don't need to call |
2337 | * posix_lock_file(). Another process could be setting a lock on this | 2365 | * posix_lock_file(). Another process could be setting a lock on this |
2338 | * file at the same time, but we wouldn't remove that lock anyway. | 2366 | * file at the same time, but we wouldn't remove that lock anyway. |
2339 | */ | 2367 | */ |
2340 | if (!file_inode(filp)->i_flock) | 2368 | if (!ctx || list_empty(&ctx->flc_posix)) |
2341 | return; | 2369 | return; |
2342 | 2370 | ||
2343 | lock.fl_type = F_UNLCK; | 2371 | lock.fl_type = F_UNLCK; |
@@ -2358,67 +2386,68 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) | |||
2358 | 2386 | ||
2359 | EXPORT_SYMBOL(locks_remove_posix); | 2387 | EXPORT_SYMBOL(locks_remove_posix); |
2360 | 2388 | ||
2389 | /* The i_flctx must be valid when calling into here */ | ||
2390 | static void | ||
2391 | locks_remove_flock(struct file *filp) | ||
2392 | { | ||
2393 | struct file_lock fl = { | ||
2394 | .fl_owner = filp, | ||
2395 | .fl_pid = current->tgid, | ||
2396 | .fl_file = filp, | ||
2397 | .fl_flags = FL_FLOCK, | ||
2398 | .fl_type = F_UNLCK, | ||
2399 | .fl_end = OFFSET_MAX, | ||
2400 | }; | ||
2401 | struct file_lock_context *flctx = file_inode(filp)->i_flctx; | ||
2402 | |||
2403 | if (list_empty(&flctx->flc_flock)) | ||
2404 | return; | ||
2405 | |||
2406 | if (filp->f_op->flock) | ||
2407 | filp->f_op->flock(filp, F_SETLKW, &fl); | ||
2408 | else | ||
2409 | flock_lock_file(filp, &fl); | ||
2410 | |||
2411 | if (fl.fl_ops && fl.fl_ops->fl_release_private) | ||
2412 | fl.fl_ops->fl_release_private(&fl); | ||
2413 | } | ||
2414 | |||
2415 | /* The i_flctx must be valid when calling into here */ | ||
2416 | static void | ||
2417 | locks_remove_lease(struct file *filp) | ||
2418 | { | ||
2419 | struct inode *inode = file_inode(filp); | ||
2420 | struct file_lock_context *ctx = inode->i_flctx; | ||
2421 | struct file_lock *fl, *tmp; | ||
2422 | LIST_HEAD(dispose); | ||
2423 | |||
2424 | if (list_empty(&ctx->flc_lease)) | ||
2425 | return; | ||
2426 | |||
2427 | spin_lock(&ctx->flc_lock); | ||
2428 | list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) | ||
2429 | if (filp == fl->fl_file) | ||
2430 | lease_modify(fl, F_UNLCK, &dispose); | ||
2431 | spin_unlock(&ctx->flc_lock); | ||
2432 | locks_dispose_list(&dispose); | ||
2433 | } | ||
2434 | |||
2361 | /* | 2435 | /* |
2362 | * This function is called on the last close of an open file. | 2436 | * This function is called on the last close of an open file. |
2363 | */ | 2437 | */ |
2364 | void locks_remove_file(struct file *filp) | 2438 | void locks_remove_file(struct file *filp) |
2365 | { | 2439 | { |
2366 | struct inode * inode = file_inode(filp); | 2440 | if (!file_inode(filp)->i_flctx) |
2367 | struct file_lock *fl; | ||
2368 | struct file_lock **before; | ||
2369 | LIST_HEAD(dispose); | ||
2370 | |||
2371 | if (!inode->i_flock) | ||
2372 | return; | 2441 | return; |
2373 | 2442 | ||
2443 | /* remove any OFD locks */ | ||
2374 | locks_remove_posix(filp, filp); | 2444 | locks_remove_posix(filp, filp); |
2375 | 2445 | ||
2376 | if (filp->f_op->flock) { | 2446 | /* remove flock locks */ |
2377 | struct file_lock fl = { | 2447 | locks_remove_flock(filp); |
2378 | .fl_owner = filp, | ||
2379 | .fl_pid = current->tgid, | ||
2380 | .fl_file = filp, | ||
2381 | .fl_flags = FL_FLOCK, | ||
2382 | .fl_type = F_UNLCK, | ||
2383 | .fl_end = OFFSET_MAX, | ||
2384 | }; | ||
2385 | filp->f_op->flock(filp, F_SETLKW, &fl); | ||
2386 | if (fl.fl_ops && fl.fl_ops->fl_release_private) | ||
2387 | fl.fl_ops->fl_release_private(&fl); | ||
2388 | } | ||
2389 | |||
2390 | spin_lock(&inode->i_lock); | ||
2391 | before = &inode->i_flock; | ||
2392 | 2448 | ||
2393 | while ((fl = *before) != NULL) { | 2449 | /* remove any leases */ |
2394 | if (fl->fl_file == filp) { | 2450 | locks_remove_lease(filp); |
2395 | if (IS_LEASE(fl)) { | ||
2396 | lease_modify(before, F_UNLCK, &dispose); | ||
2397 | continue; | ||
2398 | } | ||
2399 | |||
2400 | /* | ||
2401 | * There's a leftover lock on the list of a type that | ||
2402 | * we didn't expect to see. Most likely a classic | ||
2403 | * POSIX lock that ended up not getting released | ||
2404 | * properly, or that raced onto the list somehow. Log | ||
2405 | * some info about it and then just remove it from | ||
2406 | * the list. | ||
2407 | */ | ||
2408 | WARN(!IS_FLOCK(fl), | ||
2409 | "leftover lock: dev=%u:%u ino=%lu type=%hhd flags=0x%x start=%lld end=%lld\n", | ||
2410 | MAJOR(inode->i_sb->s_dev), | ||
2411 | MINOR(inode->i_sb->s_dev), inode->i_ino, | ||
2412 | fl->fl_type, fl->fl_flags, | ||
2413 | fl->fl_start, fl->fl_end); | ||
2414 | |||
2415 | locks_delete_lock(before, &dispose); | ||
2416 | continue; | ||
2417 | } | ||
2418 | before = &fl->fl_next; | ||
2419 | } | ||
2420 | spin_unlock(&inode->i_lock); | ||
2421 | locks_dispose_list(&dispose); | ||
2422 | } | 2451 | } |
2423 | 2452 | ||
2424 | /** | 2453 | /** |
@@ -2621,6 +2650,9 @@ static int __init filelock_init(void) | |||
2621 | { | 2650 | { |
2622 | int i; | 2651 | int i; |
2623 | 2652 | ||
2653 | flctx_cache = kmem_cache_create("file_lock_ctx", | ||
2654 | sizeof(struct file_lock_context), 0, SLAB_PANIC, NULL); | ||
2655 | |||
2624 | filelock_cache = kmem_cache_create("file_lock_cache", | 2656 | filelock_cache = kmem_cache_create("file_lock_cache", |
2625 | sizeof(struct file_lock), 0, SLAB_PANIC, NULL); | 2657 | sizeof(struct file_lock), 0, SLAB_PANIC, NULL); |
2626 | 2658 | ||
diff --git a/fs/mount.h b/fs/mount.h index 0ad6f760ce52..6a61c2b3e385 100644 --- a/fs/mount.h +++ b/fs/mount.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #include <linux/seq_file.h> | 2 | #include <linux/seq_file.h> |
3 | #include <linux/poll.h> | 3 | #include <linux/poll.h> |
4 | #include <linux/ns_common.h> | 4 | #include <linux/ns_common.h> |
5 | #include <linux/fs_pin.h> | ||
5 | 6 | ||
6 | struct mnt_namespace { | 7 | struct mnt_namespace { |
7 | atomic_t count; | 8 | atomic_t count; |
@@ -62,7 +63,8 @@ struct mount { | |||
62 | int mnt_group_id; /* peer group identifier */ | 63 | int mnt_group_id; /* peer group identifier */ |
63 | int mnt_expiry_mark; /* true if marked for expiry */ | 64 | int mnt_expiry_mark; /* true if marked for expiry */ |
64 | struct hlist_head mnt_pins; | 65 | struct hlist_head mnt_pins; |
65 | struct path mnt_ex_mountpoint; | 66 | struct fs_pin mnt_umount; |
67 | struct dentry *mnt_ex_mountpoint; | ||
66 | }; | 68 | }; |
67 | 69 | ||
68 | #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ | 70 | #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ |
diff --git a/fs/namei.c b/fs/namei.c index bc35b02883bb..96ca11dea4a2 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -118,15 +118,6 @@ | |||
118 | * POSIX.1 2.4: an empty pathname is invalid (ENOENT). | 118 | * POSIX.1 2.4: an empty pathname is invalid (ENOENT). |
119 | * PATH_MAX includes the nul terminator --RR. | 119 | * PATH_MAX includes the nul terminator --RR. |
120 | */ | 120 | */ |
121 | void final_putname(struct filename *name) | ||
122 | { | ||
123 | if (name->separate) { | ||
124 | __putname(name->name); | ||
125 | kfree(name); | ||
126 | } else { | ||
127 | __putname(name); | ||
128 | } | ||
129 | } | ||
130 | 121 | ||
131 | #define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename)) | 122 | #define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename)) |
132 | 123 | ||
@@ -145,6 +136,7 @@ getname_flags(const char __user *filename, int flags, int *empty) | |||
145 | result = __getname(); | 136 | result = __getname(); |
146 | if (unlikely(!result)) | 137 | if (unlikely(!result)) |
147 | return ERR_PTR(-ENOMEM); | 138 | return ERR_PTR(-ENOMEM); |
139 | result->refcnt = 1; | ||
148 | 140 | ||
149 | /* | 141 | /* |
150 | * First, try to embed the struct filename inside the names_cache | 142 | * First, try to embed the struct filename inside the names_cache |
@@ -179,6 +171,7 @@ recopy: | |||
179 | } | 171 | } |
180 | result->name = kname; | 172 | result->name = kname; |
181 | result->separate = true; | 173 | result->separate = true; |
174 | result->refcnt = 1; | ||
182 | max = PATH_MAX; | 175 | max = PATH_MAX; |
183 | goto recopy; | 176 | goto recopy; |
184 | } | 177 | } |
@@ -202,7 +195,7 @@ recopy: | |||
202 | return result; | 195 | return result; |
203 | 196 | ||
204 | error: | 197 | error: |
205 | final_putname(result); | 198 | putname(result); |
206 | return err; | 199 | return err; |
207 | } | 200 | } |
208 | 201 | ||
@@ -212,43 +205,56 @@ getname(const char __user * filename) | |||
212 | return getname_flags(filename, 0, NULL); | 205 | return getname_flags(filename, 0, NULL); |
213 | } | 206 | } |
214 | 207 | ||
215 | /* | ||
216 | * The "getname_kernel()" interface doesn't do pathnames longer | ||
217 | * than EMBEDDED_NAME_MAX. Deal with it - you're a kernel user. | ||
218 | */ | ||
219 | struct filename * | 208 | struct filename * |
220 | getname_kernel(const char * filename) | 209 | getname_kernel(const char * filename) |
221 | { | 210 | { |
222 | struct filename *result; | 211 | struct filename *result; |
223 | char *kname; | 212 | int len = strlen(filename) + 1; |
224 | int len; | ||
225 | |||
226 | len = strlen(filename); | ||
227 | if (len >= EMBEDDED_NAME_MAX) | ||
228 | return ERR_PTR(-ENAMETOOLONG); | ||
229 | 213 | ||
230 | result = __getname(); | 214 | result = __getname(); |
231 | if (unlikely(!result)) | 215 | if (unlikely(!result)) |
232 | return ERR_PTR(-ENOMEM); | 216 | return ERR_PTR(-ENOMEM); |
233 | 217 | ||
234 | kname = (char *)result + sizeof(*result); | 218 | if (len <= EMBEDDED_NAME_MAX) { |
235 | result->name = kname; | 219 | result->name = (char *)(result) + sizeof(*result); |
220 | result->separate = false; | ||
221 | } else if (len <= PATH_MAX) { | ||
222 | struct filename *tmp; | ||
223 | |||
224 | tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); | ||
225 | if (unlikely(!tmp)) { | ||
226 | __putname(result); | ||
227 | return ERR_PTR(-ENOMEM); | ||
228 | } | ||
229 | tmp->name = (char *)result; | ||
230 | tmp->separate = true; | ||
231 | result = tmp; | ||
232 | } else { | ||
233 | __putname(result); | ||
234 | return ERR_PTR(-ENAMETOOLONG); | ||
235 | } | ||
236 | memcpy((char *)result->name, filename, len); | ||
236 | result->uptr = NULL; | 237 | result->uptr = NULL; |
237 | result->aname = NULL; | 238 | result->aname = NULL; |
238 | result->separate = false; | 239 | result->refcnt = 1; |
240 | audit_getname(result); | ||
239 | 241 | ||
240 | strlcpy(kname, filename, EMBEDDED_NAME_MAX); | ||
241 | return result; | 242 | return result; |
242 | } | 243 | } |
243 | 244 | ||
244 | #ifdef CONFIG_AUDITSYSCALL | ||
245 | void putname(struct filename *name) | 245 | void putname(struct filename *name) |
246 | { | 246 | { |
247 | if (unlikely(!audit_dummy_context())) | 247 | BUG_ON(name->refcnt <= 0); |
248 | return audit_putname(name); | 248 | |
249 | final_putname(name); | 249 | if (--name->refcnt > 0) |
250 | return; | ||
251 | |||
252 | if (name->separate) { | ||
253 | __putname(name->name); | ||
254 | kfree(name); | ||
255 | } else | ||
256 | __putname(name); | ||
250 | } | 257 | } |
251 | #endif | ||
252 | 258 | ||
253 | static int check_acl(struct inode *inode, int mask) | 259 | static int check_acl(struct inode *inode, int mask) |
254 | { | 260 | { |
@@ -2036,31 +2042,47 @@ static int filename_lookup(int dfd, struct filename *name, | |||
2036 | static int do_path_lookup(int dfd, const char *name, | 2042 | static int do_path_lookup(int dfd, const char *name, |
2037 | unsigned int flags, struct nameidata *nd) | 2043 | unsigned int flags, struct nameidata *nd) |
2038 | { | 2044 | { |
2039 | struct filename filename = { .name = name }; | 2045 | struct filename *filename = getname_kernel(name); |
2046 | int retval = PTR_ERR(filename); | ||
2040 | 2047 | ||
2041 | return filename_lookup(dfd, &filename, flags, nd); | 2048 | if (!IS_ERR(filename)) { |
2049 | retval = filename_lookup(dfd, filename, flags, nd); | ||
2050 | putname(filename); | ||
2051 | } | ||
2052 | return retval; | ||
2042 | } | 2053 | } |
2043 | 2054 | ||
2044 | /* does lookup, returns the object with parent locked */ | 2055 | /* does lookup, returns the object with parent locked */ |
2045 | struct dentry *kern_path_locked(const char *name, struct path *path) | 2056 | struct dentry *kern_path_locked(const char *name, struct path *path) |
2046 | { | 2057 | { |
2058 | struct filename *filename = getname_kernel(name); | ||
2047 | struct nameidata nd; | 2059 | struct nameidata nd; |
2048 | struct dentry *d; | 2060 | struct dentry *d; |
2049 | int err = do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, &nd); | 2061 | int err; |
2050 | if (err) | 2062 | |
2051 | return ERR_PTR(err); | 2063 | if (IS_ERR(filename)) |
2064 | return ERR_CAST(filename); | ||
2065 | |||
2066 | err = filename_lookup(AT_FDCWD, filename, LOOKUP_PARENT, &nd); | ||
2067 | if (err) { | ||
2068 | d = ERR_PTR(err); | ||
2069 | goto out; | ||
2070 | } | ||
2052 | if (nd.last_type != LAST_NORM) { | 2071 | if (nd.last_type != LAST_NORM) { |
2053 | path_put(&nd.path); | 2072 | path_put(&nd.path); |
2054 | return ERR_PTR(-EINVAL); | 2073 | d = ERR_PTR(-EINVAL); |
2074 | goto out; | ||
2055 | } | 2075 | } |
2056 | mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); | 2076 | mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); |
2057 | d = __lookup_hash(&nd.last, nd.path.dentry, 0); | 2077 | d = __lookup_hash(&nd.last, nd.path.dentry, 0); |
2058 | if (IS_ERR(d)) { | 2078 | if (IS_ERR(d)) { |
2059 | mutex_unlock(&nd.path.dentry->d_inode->i_mutex); | 2079 | mutex_unlock(&nd.path.dentry->d_inode->i_mutex); |
2060 | path_put(&nd.path); | 2080 | path_put(&nd.path); |
2061 | return d; | 2081 | goto out; |
2062 | } | 2082 | } |
2063 | *path = nd.path; | 2083 | *path = nd.path; |
2084 | out: | ||
2085 | putname(filename); | ||
2064 | return d; | 2086 | return d; |
2065 | } | 2087 | } |
2066 | 2088 | ||
@@ -2351,13 +2373,17 @@ static int | |||
2351 | filename_mountpoint(int dfd, struct filename *s, struct path *path, | 2373 | filename_mountpoint(int dfd, struct filename *s, struct path *path, |
2352 | unsigned int flags) | 2374 | unsigned int flags) |
2353 | { | 2375 | { |
2354 | int error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU); | 2376 | int error; |
2377 | if (IS_ERR(s)) | ||
2378 | return PTR_ERR(s); | ||
2379 | error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU); | ||
2355 | if (unlikely(error == -ECHILD)) | 2380 | if (unlikely(error == -ECHILD)) |
2356 | error = path_mountpoint(dfd, s->name, path, flags); | 2381 | error = path_mountpoint(dfd, s->name, path, flags); |
2357 | if (unlikely(error == -ESTALE)) | 2382 | if (unlikely(error == -ESTALE)) |
2358 | error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL); | 2383 | error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL); |
2359 | if (likely(!error)) | 2384 | if (likely(!error)) |
2360 | audit_inode(s, path->dentry, 0); | 2385 | audit_inode(s, path->dentry, 0); |
2386 | putname(s); | ||
2361 | return error; | 2387 | return error; |
2362 | } | 2388 | } |
2363 | 2389 | ||
@@ -2379,21 +2405,14 @@ int | |||
2379 | user_path_mountpoint_at(int dfd, const char __user *name, unsigned int flags, | 2405 | user_path_mountpoint_at(int dfd, const char __user *name, unsigned int flags, |
2380 | struct path *path) | 2406 | struct path *path) |
2381 | { | 2407 | { |
2382 | struct filename *s = getname(name); | 2408 | return filename_mountpoint(dfd, getname(name), path, flags); |
2383 | int error; | ||
2384 | if (IS_ERR(s)) | ||
2385 | return PTR_ERR(s); | ||
2386 | error = filename_mountpoint(dfd, s, path, flags); | ||
2387 | putname(s); | ||
2388 | return error; | ||
2389 | } | 2409 | } |
2390 | 2410 | ||
2391 | int | 2411 | int |
2392 | kern_path_mountpoint(int dfd, const char *name, struct path *path, | 2412 | kern_path_mountpoint(int dfd, const char *name, struct path *path, |
2393 | unsigned int flags) | 2413 | unsigned int flags) |
2394 | { | 2414 | { |
2395 | struct filename s = {.name = name}; | 2415 | return filename_mountpoint(dfd, getname_kernel(name), path, flags); |
2396 | return filename_mountpoint(dfd, &s, path, flags); | ||
2397 | } | 2416 | } |
2398 | EXPORT_SYMBOL(kern_path_mountpoint); | 2417 | EXPORT_SYMBOL(kern_path_mountpoint); |
2399 | 2418 | ||
@@ -3273,7 +3292,7 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, | |||
3273 | { | 3292 | { |
3274 | struct nameidata nd; | 3293 | struct nameidata nd; |
3275 | struct file *file; | 3294 | struct file *file; |
3276 | struct filename filename = { .name = name }; | 3295 | struct filename *filename; |
3277 | int flags = op->lookup_flags | LOOKUP_ROOT; | 3296 | int flags = op->lookup_flags | LOOKUP_ROOT; |
3278 | 3297 | ||
3279 | nd.root.mnt = mnt; | 3298 | nd.root.mnt = mnt; |
@@ -3282,15 +3301,20 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, | |||
3282 | if (d_is_symlink(dentry) && op->intent & LOOKUP_OPEN) | 3301 | if (d_is_symlink(dentry) && op->intent & LOOKUP_OPEN) |
3283 | return ERR_PTR(-ELOOP); | 3302 | return ERR_PTR(-ELOOP); |
3284 | 3303 | ||
3285 | file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_RCU); | 3304 | filename = getname_kernel(name); |
3305 | if (unlikely(IS_ERR(filename))) | ||
3306 | return ERR_CAST(filename); | ||
3307 | |||
3308 | file = path_openat(-1, filename, &nd, op, flags | LOOKUP_RCU); | ||
3286 | if (unlikely(file == ERR_PTR(-ECHILD))) | 3309 | if (unlikely(file == ERR_PTR(-ECHILD))) |
3287 | file = path_openat(-1, &filename, &nd, op, flags); | 3310 | file = path_openat(-1, filename, &nd, op, flags); |
3288 | if (unlikely(file == ERR_PTR(-ESTALE))) | 3311 | if (unlikely(file == ERR_PTR(-ESTALE))) |
3289 | file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_REVAL); | 3312 | file = path_openat(-1, filename, &nd, op, flags | LOOKUP_REVAL); |
3313 | putname(filename); | ||
3290 | return file; | 3314 | return file; |
3291 | } | 3315 | } |
3292 | 3316 | ||
3293 | struct dentry *kern_path_create(int dfd, const char *pathname, | 3317 | static struct dentry *filename_create(int dfd, struct filename *name, |
3294 | struct path *path, unsigned int lookup_flags) | 3318 | struct path *path, unsigned int lookup_flags) |
3295 | { | 3319 | { |
3296 | struct dentry *dentry = ERR_PTR(-EEXIST); | 3320 | struct dentry *dentry = ERR_PTR(-EEXIST); |
@@ -3305,7 +3329,7 @@ struct dentry *kern_path_create(int dfd, const char *pathname, | |||
3305 | */ | 3329 | */ |
3306 | lookup_flags &= LOOKUP_REVAL; | 3330 | lookup_flags &= LOOKUP_REVAL; |
3307 | 3331 | ||
3308 | error = do_path_lookup(dfd, pathname, LOOKUP_PARENT|lookup_flags, &nd); | 3332 | error = filename_lookup(dfd, name, LOOKUP_PARENT|lookup_flags, &nd); |
3309 | if (error) | 3333 | if (error) |
3310 | return ERR_PTR(error); | 3334 | return ERR_PTR(error); |
3311 | 3335 | ||
@@ -3359,6 +3383,19 @@ out: | |||
3359 | path_put(&nd.path); | 3383 | path_put(&nd.path); |
3360 | return dentry; | 3384 | return dentry; |
3361 | } | 3385 | } |
3386 | |||
3387 | struct dentry *kern_path_create(int dfd, const char *pathname, | ||
3388 | struct path *path, unsigned int lookup_flags) | ||
3389 | { | ||
3390 | struct filename *filename = getname_kernel(pathname); | ||
3391 | struct dentry *res; | ||
3392 | |||
3393 | if (IS_ERR(filename)) | ||
3394 | return ERR_CAST(filename); | ||
3395 | res = filename_create(dfd, filename, path, lookup_flags); | ||
3396 | putname(filename); | ||
3397 | return res; | ||
3398 | } | ||
3362 | EXPORT_SYMBOL(kern_path_create); | 3399 | EXPORT_SYMBOL(kern_path_create); |
3363 | 3400 | ||
3364 | void done_path_create(struct path *path, struct dentry *dentry) | 3401 | void done_path_create(struct path *path, struct dentry *dentry) |
@@ -3377,7 +3414,7 @@ struct dentry *user_path_create(int dfd, const char __user *pathname, | |||
3377 | struct dentry *res; | 3414 | struct dentry *res; |
3378 | if (IS_ERR(tmp)) | 3415 | if (IS_ERR(tmp)) |
3379 | return ERR_CAST(tmp); | 3416 | return ERR_CAST(tmp); |
3380 | res = kern_path_create(dfd, tmp->name, path, lookup_flags); | 3417 | res = filename_create(dfd, tmp, path, lookup_flags); |
3381 | putname(tmp); | 3418 | putname(tmp); |
3382 | return res; | 3419 | return res; |
3383 | } | 3420 | } |
diff --git a/fs/namespace.c b/fs/namespace.c index cd1e9681a0cf..72a286e0d33e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -190,6 +190,14 @@ unsigned int mnt_get_count(struct mount *mnt) | |||
190 | #endif | 190 | #endif |
191 | } | 191 | } |
192 | 192 | ||
193 | static void drop_mountpoint(struct fs_pin *p) | ||
194 | { | ||
195 | struct mount *m = container_of(p, struct mount, mnt_umount); | ||
196 | dput(m->mnt_ex_mountpoint); | ||
197 | pin_remove(p); | ||
198 | mntput(&m->mnt); | ||
199 | } | ||
200 | |||
193 | static struct mount *alloc_vfsmnt(const char *name) | 201 | static struct mount *alloc_vfsmnt(const char *name) |
194 | { | 202 | { |
195 | struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); | 203 | struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); |
@@ -201,7 +209,7 @@ static struct mount *alloc_vfsmnt(const char *name) | |||
201 | goto out_free_cache; | 209 | goto out_free_cache; |
202 | 210 | ||
203 | if (name) { | 211 | if (name) { |
204 | mnt->mnt_devname = kstrdup(name, GFP_KERNEL); | 212 | mnt->mnt_devname = kstrdup_const(name, GFP_KERNEL); |
205 | if (!mnt->mnt_devname) | 213 | if (!mnt->mnt_devname) |
206 | goto out_free_id; | 214 | goto out_free_id; |
207 | } | 215 | } |
@@ -229,12 +237,13 @@ static struct mount *alloc_vfsmnt(const char *name) | |||
229 | #ifdef CONFIG_FSNOTIFY | 237 | #ifdef CONFIG_FSNOTIFY |
230 | INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); | 238 | INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); |
231 | #endif | 239 | #endif |
240 | init_fs_pin(&mnt->mnt_umount, drop_mountpoint); | ||
232 | } | 241 | } |
233 | return mnt; | 242 | return mnt; |
234 | 243 | ||
235 | #ifdef CONFIG_SMP | 244 | #ifdef CONFIG_SMP |
236 | out_free_devname: | 245 | out_free_devname: |
237 | kfree(mnt->mnt_devname); | 246 | kfree_const(mnt->mnt_devname); |
238 | #endif | 247 | #endif |
239 | out_free_id: | 248 | out_free_id: |
240 | mnt_free_id(mnt); | 249 | mnt_free_id(mnt); |
@@ -568,7 +577,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) | |||
568 | 577 | ||
569 | static void free_vfsmnt(struct mount *mnt) | 578 | static void free_vfsmnt(struct mount *mnt) |
570 | { | 579 | { |
571 | kfree(mnt->mnt_devname); | 580 | kfree_const(mnt->mnt_devname); |
572 | #ifdef CONFIG_SMP | 581 | #ifdef CONFIG_SMP |
573 | free_percpu(mnt->mnt_pcp); | 582 | free_percpu(mnt->mnt_pcp); |
574 | #endif | 583 | #endif |
@@ -1289,7 +1298,6 @@ static HLIST_HEAD(unmounted); /* protected by namespace_sem */ | |||
1289 | 1298 | ||
1290 | static void namespace_unlock(void) | 1299 | static void namespace_unlock(void) |
1291 | { | 1300 | { |
1292 | struct mount *mnt; | ||
1293 | struct hlist_head head = unmounted; | 1301 | struct hlist_head head = unmounted; |
1294 | 1302 | ||
1295 | if (likely(hlist_empty(&head))) { | 1303 | if (likely(hlist_empty(&head))) { |
@@ -1299,23 +1307,11 @@ static void namespace_unlock(void) | |||
1299 | 1307 | ||
1300 | head.first->pprev = &head.first; | 1308 | head.first->pprev = &head.first; |
1301 | INIT_HLIST_HEAD(&unmounted); | 1309 | INIT_HLIST_HEAD(&unmounted); |
1302 | |||
1303 | /* undo decrements we'd done in umount_tree() */ | ||
1304 | hlist_for_each_entry(mnt, &head, mnt_hash) | ||
1305 | if (mnt->mnt_ex_mountpoint.mnt) | ||
1306 | mntget(mnt->mnt_ex_mountpoint.mnt); | ||
1307 | |||
1308 | up_write(&namespace_sem); | 1310 | up_write(&namespace_sem); |
1309 | 1311 | ||
1310 | synchronize_rcu(); | 1312 | synchronize_rcu(); |
1311 | 1313 | ||
1312 | while (!hlist_empty(&head)) { | 1314 | group_pin_kill(&head); |
1313 | mnt = hlist_entry(head.first, struct mount, mnt_hash); | ||
1314 | hlist_del_init(&mnt->mnt_hash); | ||
1315 | if (mnt->mnt_ex_mountpoint.mnt) | ||
1316 | path_put(&mnt->mnt_ex_mountpoint); | ||
1317 | mntput(&mnt->mnt); | ||
1318 | } | ||
1319 | } | 1315 | } |
1320 | 1316 | ||
1321 | static inline void namespace_lock(void) | 1317 | static inline void namespace_lock(void) |
@@ -1334,7 +1330,6 @@ void umount_tree(struct mount *mnt, int how) | |||
1334 | { | 1330 | { |
1335 | HLIST_HEAD(tmp_list); | 1331 | HLIST_HEAD(tmp_list); |
1336 | struct mount *p; | 1332 | struct mount *p; |
1337 | struct mount *last = NULL; | ||
1338 | 1333 | ||
1339 | for (p = mnt; p; p = next_mnt(p, mnt)) { | 1334 | for (p = mnt; p; p = next_mnt(p, mnt)) { |
1340 | hlist_del_init_rcu(&p->mnt_hash); | 1335 | hlist_del_init_rcu(&p->mnt_hash); |
@@ -1347,33 +1342,28 @@ void umount_tree(struct mount *mnt, int how) | |||
1347 | if (how) | 1342 | if (how) |
1348 | propagate_umount(&tmp_list); | 1343 | propagate_umount(&tmp_list); |
1349 | 1344 | ||
1350 | hlist_for_each_entry(p, &tmp_list, mnt_hash) { | 1345 | while (!hlist_empty(&tmp_list)) { |
1346 | p = hlist_entry(tmp_list.first, struct mount, mnt_hash); | ||
1347 | hlist_del_init_rcu(&p->mnt_hash); | ||
1351 | list_del_init(&p->mnt_expire); | 1348 | list_del_init(&p->mnt_expire); |
1352 | list_del_init(&p->mnt_list); | 1349 | list_del_init(&p->mnt_list); |
1353 | __touch_mnt_namespace(p->mnt_ns); | 1350 | __touch_mnt_namespace(p->mnt_ns); |
1354 | p->mnt_ns = NULL; | 1351 | p->mnt_ns = NULL; |
1355 | if (how < 2) | 1352 | if (how < 2) |
1356 | p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; | 1353 | p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; |
1354 | |||
1355 | pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, &unmounted); | ||
1357 | if (mnt_has_parent(p)) { | 1356 | if (mnt_has_parent(p)) { |
1358 | hlist_del_init(&p->mnt_mp_list); | 1357 | hlist_del_init(&p->mnt_mp_list); |
1359 | put_mountpoint(p->mnt_mp); | 1358 | put_mountpoint(p->mnt_mp); |
1360 | mnt_add_count(p->mnt_parent, -1); | 1359 | mnt_add_count(p->mnt_parent, -1); |
1361 | /* move the reference to mountpoint into ->mnt_ex_mountpoint */ | 1360 | /* old mountpoint will be dropped when we can do that */ |
1362 | p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint; | 1361 | p->mnt_ex_mountpoint = p->mnt_mountpoint; |
1363 | p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt; | ||
1364 | p->mnt_mountpoint = p->mnt.mnt_root; | 1362 | p->mnt_mountpoint = p->mnt.mnt_root; |
1365 | p->mnt_parent = p; | 1363 | p->mnt_parent = p; |
1366 | p->mnt_mp = NULL; | 1364 | p->mnt_mp = NULL; |
1367 | } | 1365 | } |
1368 | change_mnt_propagation(p, MS_PRIVATE); | 1366 | change_mnt_propagation(p, MS_PRIVATE); |
1369 | last = p; | ||
1370 | } | ||
1371 | if (last) { | ||
1372 | last->mnt_hash.next = unmounted.first; | ||
1373 | if (unmounted.first) | ||
1374 | unmounted.first->pprev = &last->mnt_hash.next; | ||
1375 | unmounted.first = tmp_list.first; | ||
1376 | unmounted.first->pprev = &unmounted.first; | ||
1377 | } | 1367 | } |
1378 | } | 1368 | } |
1379 | 1369 | ||
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 008960101520..e7ca827d7694 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c | |||
@@ -77,6 +77,7 @@ static int ncp_hash_dentry(const struct dentry *, struct qstr *); | |||
77 | static int ncp_compare_dentry(const struct dentry *, const struct dentry *, | 77 | static int ncp_compare_dentry(const struct dentry *, const struct dentry *, |
78 | unsigned int, const char *, const struct qstr *); | 78 | unsigned int, const char *, const struct qstr *); |
79 | static int ncp_delete_dentry(const struct dentry *); | 79 | static int ncp_delete_dentry(const struct dentry *); |
80 | static void ncp_d_prune(struct dentry *dentry); | ||
80 | 81 | ||
81 | const struct dentry_operations ncp_dentry_operations = | 82 | const struct dentry_operations ncp_dentry_operations = |
82 | { | 83 | { |
@@ -84,6 +85,7 @@ const struct dentry_operations ncp_dentry_operations = | |||
84 | .d_hash = ncp_hash_dentry, | 85 | .d_hash = ncp_hash_dentry, |
85 | .d_compare = ncp_compare_dentry, | 86 | .d_compare = ncp_compare_dentry, |
86 | .d_delete = ncp_delete_dentry, | 87 | .d_delete = ncp_delete_dentry, |
88 | .d_prune = ncp_d_prune, | ||
87 | }; | 89 | }; |
88 | 90 | ||
89 | #define ncp_namespace(i) (NCP_SERVER(i)->name_space[NCP_FINFO(i)->volNumber]) | 91 | #define ncp_namespace(i) (NCP_SERVER(i)->name_space[NCP_FINFO(i)->volNumber]) |
@@ -384,42 +386,6 @@ finished: | |||
384 | return val; | 386 | return val; |
385 | } | 387 | } |
386 | 388 | ||
387 | static struct dentry * | ||
388 | ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) | ||
389 | { | ||
390 | struct dentry *dent = dentry; | ||
391 | |||
392 | if (d_validate(dent, parent)) { | ||
393 | if (dent->d_name.len <= NCP_MAXPATHLEN && | ||
394 | (unsigned long)dent->d_fsdata == fpos) { | ||
395 | if (!dent->d_inode) { | ||
396 | dput(dent); | ||
397 | dent = NULL; | ||
398 | } | ||
399 | return dent; | ||
400 | } | ||
401 | dput(dent); | ||
402 | } | ||
403 | |||
404 | /* If a pointer is invalid, we search the dentry. */ | ||
405 | spin_lock(&parent->d_lock); | ||
406 | list_for_each_entry(dent, &parent->d_subdirs, d_child) { | ||
407 | if ((unsigned long)dent->d_fsdata == fpos) { | ||
408 | if (dent->d_inode) | ||
409 | dget(dent); | ||
410 | else | ||
411 | dent = NULL; | ||
412 | spin_unlock(&parent->d_lock); | ||
413 | goto out; | ||
414 | } | ||
415 | } | ||
416 | spin_unlock(&parent->d_lock); | ||
417 | return NULL; | ||
418 | |||
419 | out: | ||
420 | return dent; | ||
421 | } | ||
422 | |||
423 | static time_t ncp_obtain_mtime(struct dentry *dentry) | 389 | static time_t ncp_obtain_mtime(struct dentry *dentry) |
424 | { | 390 | { |
425 | struct inode *inode = dentry->d_inode; | 391 | struct inode *inode = dentry->d_inode; |
@@ -435,6 +401,20 @@ static time_t ncp_obtain_mtime(struct dentry *dentry) | |||
435 | return ncp_date_dos2unix(i.modifyTime, i.modifyDate); | 401 | return ncp_date_dos2unix(i.modifyTime, i.modifyDate); |
436 | } | 402 | } |
437 | 403 | ||
404 | static inline void | ||
405 | ncp_invalidate_dircache_entries(struct dentry *parent) | ||
406 | { | ||
407 | struct ncp_server *server = NCP_SERVER(parent->d_inode); | ||
408 | struct dentry *dentry; | ||
409 | |||
410 | spin_lock(&parent->d_lock); | ||
411 | list_for_each_entry(dentry, &parent->d_subdirs, d_child) { | ||
412 | dentry->d_fsdata = NULL; | ||
413 | ncp_age_dentry(server, dentry); | ||
414 | } | ||
415 | spin_unlock(&parent->d_lock); | ||
416 | } | ||
417 | |||
438 | static int ncp_readdir(struct file *file, struct dir_context *ctx) | 418 | static int ncp_readdir(struct file *file, struct dir_context *ctx) |
439 | { | 419 | { |
440 | struct dentry *dentry = file->f_path.dentry; | 420 | struct dentry *dentry = file->f_path.dentry; |
@@ -500,10 +480,21 @@ static int ncp_readdir(struct file *file, struct dir_context *ctx) | |||
500 | struct dentry *dent; | 480 | struct dentry *dent; |
501 | bool over; | 481 | bool over; |
502 | 482 | ||
503 | dent = ncp_dget_fpos(ctl.cache->dentry[ctl.idx], | 483 | spin_lock(&dentry->d_lock); |
504 | dentry, ctx->pos); | 484 | if (!(NCP_FINFO(inode)->flags & NCPI_DIR_CACHE)) { |
505 | if (!dent) | 485 | spin_unlock(&dentry->d_lock); |
486 | goto invalid_cache; | ||
487 | } | ||
488 | dent = ctl.cache->dentry[ctl.idx]; | ||
489 | if (unlikely(!lockref_get_not_dead(&dent->d_lockref))) { | ||
490 | spin_unlock(&dentry->d_lock); | ||
491 | goto invalid_cache; | ||
492 | } | ||
493 | spin_unlock(&dentry->d_lock); | ||
494 | if (!dent->d_inode) { | ||
495 | dput(dent); | ||
506 | goto invalid_cache; | 496 | goto invalid_cache; |
497 | } | ||
507 | over = !dir_emit(ctx, dent->d_name.name, | 498 | over = !dir_emit(ctx, dent->d_name.name, |
508 | dent->d_name.len, | 499 | dent->d_name.len, |
509 | dent->d_inode->i_ino, DT_UNKNOWN); | 500 | dent->d_inode->i_ino, DT_UNKNOWN); |
@@ -548,6 +539,9 @@ init_cache: | |||
548 | ctl.filled = 0; | 539 | ctl.filled = 0; |
549 | ctl.valid = 1; | 540 | ctl.valid = 1; |
550 | read_really: | 541 | read_really: |
542 | spin_lock(&dentry->d_lock); | ||
543 | NCP_FINFO(inode)->flags |= NCPI_DIR_CACHE; | ||
544 | spin_unlock(&dentry->d_lock); | ||
551 | if (ncp_is_server_root(inode)) { | 545 | if (ncp_is_server_root(inode)) { |
552 | ncp_read_volume_list(file, ctx, &ctl); | 546 | ncp_read_volume_list(file, ctx, &ctl); |
553 | } else { | 547 | } else { |
@@ -573,6 +567,13 @@ out: | |||
573 | return result; | 567 | return result; |
574 | } | 568 | } |
575 | 569 | ||
570 | static void ncp_d_prune(struct dentry *dentry) | ||
571 | { | ||
572 | if (!dentry->d_fsdata) /* not referenced from page cache */ | ||
573 | return; | ||
574 | NCP_FINFO(dentry->d_parent->d_inode)->flags &= ~NCPI_DIR_CACHE; | ||
575 | } | ||
576 | |||
576 | static int | 577 | static int |
577 | ncp_fill_cache(struct file *file, struct dir_context *ctx, | 578 | ncp_fill_cache(struct file *file, struct dir_context *ctx, |
578 | struct ncp_cache_control *ctrl, struct ncp_entry_info *entry, | 579 | struct ncp_cache_control *ctrl, struct ncp_entry_info *entry, |
@@ -630,6 +631,10 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx, | |||
630 | d_instantiate(newdent, inode); | 631 | d_instantiate(newdent, inode); |
631 | if (!hashed) | 632 | if (!hashed) |
632 | d_rehash(newdent); | 633 | d_rehash(newdent); |
634 | } else { | ||
635 | spin_lock(&dentry->d_lock); | ||
636 | NCP_FINFO(inode)->flags &= ~NCPI_DIR_CACHE; | ||
637 | spin_unlock(&dentry->d_lock); | ||
633 | } | 638 | } |
634 | } else { | 639 | } else { |
635 | struct inode *inode = newdent->d_inode; | 640 | struct inode *inode = newdent->d_inode; |
@@ -639,12 +644,6 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx, | |||
639 | mutex_unlock(&inode->i_mutex); | 644 | mutex_unlock(&inode->i_mutex); |
640 | } | 645 | } |
641 | 646 | ||
642 | if (newdent->d_inode) { | ||
643 | ino = newdent->d_inode->i_ino; | ||
644 | newdent->d_fsdata = (void *) ctl.fpos; | ||
645 | ncp_new_dentry(newdent); | ||
646 | } | ||
647 | |||
648 | if (ctl.idx >= NCP_DIRCACHE_SIZE) { | 647 | if (ctl.idx >= NCP_DIRCACHE_SIZE) { |
649 | if (ctl.page) { | 648 | if (ctl.page) { |
650 | kunmap(ctl.page); | 649 | kunmap(ctl.page); |
@@ -660,8 +659,13 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx, | |||
660 | ctl.cache = kmap(ctl.page); | 659 | ctl.cache = kmap(ctl.page); |
661 | } | 660 | } |
662 | if (ctl.cache) { | 661 | if (ctl.cache) { |
663 | ctl.cache->dentry[ctl.idx] = newdent; | 662 | if (newdent->d_inode) { |
664 | valid = 1; | 663 | newdent->d_fsdata = newdent; |
664 | ctl.cache->dentry[ctl.idx] = newdent; | ||
665 | ino = newdent->d_inode->i_ino; | ||
666 | ncp_new_dentry(newdent); | ||
667 | } | ||
668 | valid = 1; | ||
665 | } | 669 | } |
666 | dput(newdent); | 670 | dput(newdent); |
667 | end_advance: | 671 | end_advance: |
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index e31e589369a4..01a9e16e9782 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c | |||
@@ -267,7 +267,6 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info) | |||
267 | if (inode) { | 267 | if (inode) { |
268 | atomic_set(&NCP_FINFO(inode)->opened, info->opened); | 268 | atomic_set(&NCP_FINFO(inode)->opened, info->opened); |
269 | 269 | ||
270 | inode->i_mapping->backing_dev_info = sb->s_bdi; | ||
271 | inode->i_ino = info->ino; | 270 | inode->i_ino = info->ino; |
272 | ncp_set_attr(inode, info); | 271 | ncp_set_attr(inode, info); |
273 | if (S_ISREG(inode->i_mode)) { | 272 | if (S_ISREG(inode->i_mode)) { |
@@ -560,7 +559,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) | |||
560 | server = NCP_SBP(sb); | 559 | server = NCP_SBP(sb); |
561 | memset(server, 0, sizeof(*server)); | 560 | memset(server, 0, sizeof(*server)); |
562 | 561 | ||
563 | error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY); | 562 | error = bdi_setup_and_register(&server->bdi, "ncpfs"); |
564 | if (error) | 563 | if (error) |
565 | goto out_fput; | 564 | goto out_fput; |
566 | 565 | ||
diff --git a/fs/ncpfs/ncp_fs_i.h b/fs/ncpfs/ncp_fs_i.h index 4b0bec477846..c4794504f843 100644 --- a/fs/ncpfs/ncp_fs_i.h +++ b/fs/ncpfs/ncp_fs_i.h | |||
@@ -22,6 +22,7 @@ struct ncp_inode_info { | |||
22 | int access; | 22 | int access; |
23 | int flags; | 23 | int flags; |
24 | #define NCPI_KLUDGE_SYMLINK 0x0001 | 24 | #define NCPI_KLUDGE_SYMLINK 0x0001 |
25 | #define NCPI_DIR_CACHE 0x0002 | ||
25 | __u8 file_handle[6]; | 26 | __u8 file_handle[6]; |
26 | struct inode vfs_inode; | 27 | struct inode vfs_inode; |
27 | }; | 28 | }; |
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index b785f74bfe3c..250e443a07f3 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h | |||
@@ -184,36 +184,6 @@ ncp_new_dentry(struct dentry* dentry) | |||
184 | dentry->d_time = jiffies; | 184 | dentry->d_time = jiffies; |
185 | } | 185 | } |
186 | 186 | ||
187 | static inline void | ||
188 | ncp_renew_dentries(struct dentry *parent) | ||
189 | { | ||
190 | struct ncp_server *server = NCP_SERVER(parent->d_inode); | ||
191 | struct dentry *dentry; | ||
192 | |||
193 | spin_lock(&parent->d_lock); | ||
194 | list_for_each_entry(dentry, &parent->d_subdirs, d_child) { | ||
195 | if (dentry->d_fsdata == NULL) | ||
196 | ncp_age_dentry(server, dentry); | ||
197 | else | ||
198 | ncp_new_dentry(dentry); | ||
199 | } | ||
200 | spin_unlock(&parent->d_lock); | ||
201 | } | ||
202 | |||
203 | static inline void | ||
204 | ncp_invalidate_dircache_entries(struct dentry *parent) | ||
205 | { | ||
206 | struct ncp_server *server = NCP_SERVER(parent->d_inode); | ||
207 | struct dentry *dentry; | ||
208 | |||
209 | spin_lock(&parent->d_lock); | ||
210 | list_for_each_entry(dentry, &parent->d_subdirs, d_child) { | ||
211 | dentry->d_fsdata = NULL; | ||
212 | ncp_age_dentry(server, dentry); | ||
213 | } | ||
214 | spin_unlock(&parent->d_lock); | ||
215 | } | ||
216 | |||
217 | struct ncp_cache_head { | 187 | struct ncp_cache_head { |
218 | time_t mtime; | 188 | time_t mtime; |
219 | unsigned long time; /* cache age */ | 189 | unsigned long time; /* cache age */ |
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 3dece03f2fc8..c7abc10279af 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
@@ -128,6 +128,11 @@ config PNFS_OBJLAYOUT | |||
128 | depends on NFS_V4_1 && SCSI_OSD_ULD | 128 | depends on NFS_V4_1 && SCSI_OSD_ULD |
129 | default NFS_V4 | 129 | default NFS_V4 |
130 | 130 | ||
131 | config PNFS_FLEXFILE_LAYOUT | ||
132 | tristate | ||
133 | depends on NFS_V4_1 && NFS_V3 | ||
134 | default m | ||
135 | |||
131 | config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN | 136 | config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN |
132 | string "NFSv4.1 Implementation ID Domain" | 137 | string "NFSv4.1 Implementation ID Domain" |
133 | depends on NFS_V4_1 | 138 | depends on NFS_V4_1 |
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 04cb830fa09f..1e987acf20c9 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile | |||
@@ -27,9 +27,10 @@ nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o | |||
27 | dns_resolve.o nfs4trace.o | 27 | dns_resolve.o nfs4trace.o |
28 | nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o | 28 | nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o |
29 | nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o | 29 | nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o |
30 | nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o | 30 | nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o pnfs_nfs.o |
31 | nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o | 31 | nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o |
32 | 32 | ||
33 | obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/ | 33 | obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/ |
34 | obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ | 34 | obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ |
35 | obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ | 35 | obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ |
36 | obj-$(CONFIG_PNFS_FLEXFILE_LAYOUT) += flexfilelayout/ | ||
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 77fec6a55f57..1cac3c175d18 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -860,12 +860,14 @@ static const struct nfs_pageio_ops bl_pg_read_ops = { | |||
860 | .pg_init = bl_pg_init_read, | 860 | .pg_init = bl_pg_init_read, |
861 | .pg_test = bl_pg_test_read, | 861 | .pg_test = bl_pg_test_read, |
862 | .pg_doio = pnfs_generic_pg_readpages, | 862 | .pg_doio = pnfs_generic_pg_readpages, |
863 | .pg_cleanup = pnfs_generic_pg_cleanup, | ||
863 | }; | 864 | }; |
864 | 865 | ||
865 | static const struct nfs_pageio_ops bl_pg_write_ops = { | 866 | static const struct nfs_pageio_ops bl_pg_write_ops = { |
866 | .pg_init = bl_pg_init_write, | 867 | .pg_init = bl_pg_init_write, |
867 | .pg_test = bl_pg_test_write, | 868 | .pg_test = bl_pg_test_write, |
868 | .pg_doio = pnfs_generic_pg_writepages, | 869 | .pg_doio = pnfs_generic_pg_writepages, |
870 | .pg_cleanup = pnfs_generic_pg_cleanup, | ||
869 | }; | 871 | }; |
870 | 872 | ||
871 | static struct pnfs_layoutdriver_type blocklayout_type = { | 873 | static struct pnfs_layoutdriver_type blocklayout_type = { |
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index b8fb3a4ef649..351be9205bf8 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -128,22 +128,24 @@ nfs41_callback_svc(void *vrqstp) | |||
128 | if (try_to_freeze()) | 128 | if (try_to_freeze()) |
129 | continue; | 129 | continue; |
130 | 130 | ||
131 | prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); | 131 | prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_UNINTERRUPTIBLE); |
132 | spin_lock_bh(&serv->sv_cb_lock); | 132 | spin_lock_bh(&serv->sv_cb_lock); |
133 | if (!list_empty(&serv->sv_cb_list)) { | 133 | if (!list_empty(&serv->sv_cb_list)) { |
134 | req = list_first_entry(&serv->sv_cb_list, | 134 | req = list_first_entry(&serv->sv_cb_list, |
135 | struct rpc_rqst, rq_bc_list); | 135 | struct rpc_rqst, rq_bc_list); |
136 | list_del(&req->rq_bc_list); | 136 | list_del(&req->rq_bc_list); |
137 | spin_unlock_bh(&serv->sv_cb_lock); | 137 | spin_unlock_bh(&serv->sv_cb_lock); |
138 | finish_wait(&serv->sv_cb_waitq, &wq); | ||
138 | dprintk("Invoking bc_svc_process()\n"); | 139 | dprintk("Invoking bc_svc_process()\n"); |
139 | error = bc_svc_process(serv, req, rqstp); | 140 | error = bc_svc_process(serv, req, rqstp); |
140 | dprintk("bc_svc_process() returned w/ error code= %d\n", | 141 | dprintk("bc_svc_process() returned w/ error code= %d\n", |
141 | error); | 142 | error); |
142 | } else { | 143 | } else { |
143 | spin_unlock_bh(&serv->sv_cb_lock); | 144 | spin_unlock_bh(&serv->sv_cb_lock); |
144 | schedule(); | 145 | /* schedule_timeout to game the hung task watchdog */ |
146 | schedule_timeout(60 * HZ); | ||
147 | finish_wait(&serv->sv_cb_waitq, &wq); | ||
145 | } | 148 | } |
146 | finish_wait(&serv->sv_cb_waitq, &wq); | ||
147 | } | 149 | } |
148 | return 0; | 150 | return 0; |
149 | } | 151 | } |
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 7f3f60641344..da5433230bb1 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
@@ -85,25 +85,30 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ | |||
85 | { | 85 | { |
86 | struct inode *inode = state->inode; | 86 | struct inode *inode = state->inode; |
87 | struct file_lock *fl; | 87 | struct file_lock *fl; |
88 | struct file_lock_context *flctx = inode->i_flctx; | ||
89 | struct list_head *list; | ||
88 | int status = 0; | 90 | int status = 0; |
89 | 91 | ||
90 | if (inode->i_flock == NULL) | 92 | if (flctx == NULL) |
91 | goto out; | 93 | goto out; |
92 | 94 | ||
93 | /* Protect inode->i_flock using the i_lock */ | 95 | list = &flctx->flc_posix; |
94 | spin_lock(&inode->i_lock); | 96 | spin_lock(&flctx->flc_lock); |
95 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 97 | restart: |
96 | if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) | 98 | list_for_each_entry(fl, list, fl_list) { |
97 | continue; | ||
98 | if (nfs_file_open_context(fl->fl_file) != ctx) | 99 | if (nfs_file_open_context(fl->fl_file) != ctx) |
99 | continue; | 100 | continue; |
100 | spin_unlock(&inode->i_lock); | 101 | spin_unlock(&flctx->flc_lock); |
101 | status = nfs4_lock_delegation_recall(fl, state, stateid); | 102 | status = nfs4_lock_delegation_recall(fl, state, stateid); |
102 | if (status < 0) | 103 | if (status < 0) |
103 | goto out; | 104 | goto out; |
104 | spin_lock(&inode->i_lock); | 105 | spin_lock(&flctx->flc_lock); |
105 | } | 106 | } |
106 | spin_unlock(&inode->i_lock); | 107 | if (list == &flctx->flc_posix) { |
108 | list = &flctx->flc_flock; | ||
109 | goto restart; | ||
110 | } | ||
111 | spin_unlock(&flctx->flc_lock); | ||
107 | out: | 112 | out: |
108 | return status; | 113 | return status; |
109 | } | 114 | } |
@@ -301,6 +306,17 @@ nfs_inode_detach_delegation(struct inode *inode) | |||
301 | return nfs_detach_delegation(nfsi, delegation, server); | 306 | return nfs_detach_delegation(nfsi, delegation, server); |
302 | } | 307 | } |
303 | 308 | ||
309 | static void | ||
310 | nfs_update_inplace_delegation(struct nfs_delegation *delegation, | ||
311 | const struct nfs_delegation *update) | ||
312 | { | ||
313 | if (nfs4_stateid_is_newer(&update->stateid, &delegation->stateid)) { | ||
314 | delegation->stateid.seqid = update->stateid.seqid; | ||
315 | smp_wmb(); | ||
316 | delegation->type = update->type; | ||
317 | } | ||
318 | } | ||
319 | |||
304 | /** | 320 | /** |
305 | * nfs_inode_set_delegation - set up a delegation on an inode | 321 | * nfs_inode_set_delegation - set up a delegation on an inode |
306 | * @inode: inode to which delegation applies | 322 | * @inode: inode to which delegation applies |
@@ -334,9 +350,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct | |||
334 | old_delegation = rcu_dereference_protected(nfsi->delegation, | 350 | old_delegation = rcu_dereference_protected(nfsi->delegation, |
335 | lockdep_is_held(&clp->cl_lock)); | 351 | lockdep_is_held(&clp->cl_lock)); |
336 | if (old_delegation != NULL) { | 352 | if (old_delegation != NULL) { |
337 | if (nfs4_stateid_match(&delegation->stateid, | 353 | /* Is this an update of the existing delegation? */ |
338 | &old_delegation->stateid) && | 354 | if (nfs4_stateid_match_other(&old_delegation->stateid, |
339 | delegation->type == old_delegation->type) { | 355 | &delegation->stateid)) { |
356 | nfs_update_inplace_delegation(old_delegation, | ||
357 | delegation); | ||
358 | nfsi->delegation_state = old_delegation->type; | ||
340 | goto out; | 359 | goto out; |
341 | } | 360 | } |
342 | /* | 361 | /* |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 294692ff83b1..7077521acdf4 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -66,6 +66,10 @@ static struct kmem_cache *nfs_direct_cachep; | |||
66 | /* | 66 | /* |
67 | * This represents a set of asynchronous requests that we're waiting on | 67 | * This represents a set of asynchronous requests that we're waiting on |
68 | */ | 68 | */ |
69 | struct nfs_direct_mirror { | ||
70 | ssize_t count; | ||
71 | }; | ||
72 | |||
69 | struct nfs_direct_req { | 73 | struct nfs_direct_req { |
70 | struct kref kref; /* release manager */ | 74 | struct kref kref; /* release manager */ |
71 | 75 | ||
@@ -78,8 +82,13 @@ struct nfs_direct_req { | |||
78 | /* completion state */ | 82 | /* completion state */ |
79 | atomic_t io_count; /* i/os we're waiting for */ | 83 | atomic_t io_count; /* i/os we're waiting for */ |
80 | spinlock_t lock; /* protect completion state */ | 84 | spinlock_t lock; /* protect completion state */ |
85 | |||
86 | struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX]; | ||
87 | int mirror_count; | ||
88 | |||
81 | ssize_t count, /* bytes actually processed */ | 89 | ssize_t count, /* bytes actually processed */ |
82 | bytes_left, /* bytes left to be sent */ | 90 | bytes_left, /* bytes left to be sent */ |
91 | io_start, /* start of IO */ | ||
83 | error; /* any reported error */ | 92 | error; /* any reported error */ |
84 | struct completion completion; /* wait for i/o completion */ | 93 | struct completion completion; /* wait for i/o completion */ |
85 | 94 | ||
@@ -108,26 +117,56 @@ static inline int put_dreq(struct nfs_direct_req *dreq) | |||
108 | return atomic_dec_and_test(&dreq->io_count); | 117 | return atomic_dec_and_test(&dreq->io_count); |
109 | } | 118 | } |
110 | 119 | ||
120 | void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq) | ||
121 | { | ||
122 | dreq->flags = NFS_ODIRECT_RESCHED_WRITES; | ||
123 | } | ||
124 | EXPORT_SYMBOL_GPL(nfs_direct_set_resched_writes); | ||
125 | |||
126 | static void | ||
127 | nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) | ||
128 | { | ||
129 | int i; | ||
130 | ssize_t count; | ||
131 | |||
132 | WARN_ON_ONCE(hdr->pgio_mirror_idx >= dreq->mirror_count); | ||
133 | |||
134 | count = dreq->mirrors[hdr->pgio_mirror_idx].count; | ||
135 | if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) { | ||
136 | count = hdr->io_start + hdr->good_bytes - dreq->io_start; | ||
137 | dreq->mirrors[hdr->pgio_mirror_idx].count = count; | ||
138 | } | ||
139 | |||
140 | /* update the dreq->count by finding the minimum agreed count from all | ||
141 | * mirrors */ | ||
142 | count = dreq->mirrors[0].count; | ||
143 | |||
144 | for (i = 1; i < dreq->mirror_count; i++) | ||
145 | count = min(count, dreq->mirrors[i].count); | ||
146 | |||
147 | dreq->count = count; | ||
148 | } | ||
149 | |||
111 | /* | 150 | /* |
112 | * nfs_direct_select_verf - select the right verifier | 151 | * nfs_direct_select_verf - select the right verifier |
113 | * @dreq - direct request possibly spanning multiple servers | 152 | * @dreq - direct request possibly spanning multiple servers |
114 | * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs | 153 | * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs |
115 | * @ds_idx - index of data server in data server list, only valid if ds_clp set | 154 | * @commit_idx - commit bucket index for the DS |
116 | * | 155 | * |
117 | * returns the correct verifier to use given the role of the server | 156 | * returns the correct verifier to use given the role of the server |
118 | */ | 157 | */ |
119 | static struct nfs_writeverf * | 158 | static struct nfs_writeverf * |
120 | nfs_direct_select_verf(struct nfs_direct_req *dreq, | 159 | nfs_direct_select_verf(struct nfs_direct_req *dreq, |
121 | struct nfs_client *ds_clp, | 160 | struct nfs_client *ds_clp, |
122 | int ds_idx) | 161 | int commit_idx) |
123 | { | 162 | { |
124 | struct nfs_writeverf *verfp = &dreq->verf; | 163 | struct nfs_writeverf *verfp = &dreq->verf; |
125 | 164 | ||
126 | #ifdef CONFIG_NFS_V4_1 | 165 | #ifdef CONFIG_NFS_V4_1 |
127 | if (ds_clp) { | 166 | if (ds_clp) { |
128 | /* pNFS is in use, use the DS verf */ | 167 | /* pNFS is in use, use the DS verf */ |
129 | if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets) | 168 | if (commit_idx >= 0 && commit_idx < dreq->ds_cinfo.nbuckets) |
130 | verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf; | 169 | verfp = &dreq->ds_cinfo.buckets[commit_idx].direct_verf; |
131 | else | 170 | else |
132 | WARN_ON_ONCE(1); | 171 | WARN_ON_ONCE(1); |
133 | } | 172 | } |
@@ -148,8 +187,7 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq, | |||
148 | { | 187 | { |
149 | struct nfs_writeverf *verfp; | 188 | struct nfs_writeverf *verfp; |
150 | 189 | ||
151 | verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, | 190 | verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, hdr->ds_commit_idx); |
152 | hdr->ds_idx); | ||
153 | WARN_ON_ONCE(verfp->committed >= 0); | 191 | WARN_ON_ONCE(verfp->committed >= 0); |
154 | memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); | 192 | memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); |
155 | WARN_ON_ONCE(verfp->committed < 0); | 193 | WARN_ON_ONCE(verfp->committed < 0); |
@@ -169,8 +207,7 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, | |||
169 | { | 207 | { |
170 | struct nfs_writeverf *verfp; | 208 | struct nfs_writeverf *verfp; |
171 | 209 | ||
172 | verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, | 210 | verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, hdr->ds_commit_idx); |
173 | hdr->ds_idx); | ||
174 | if (verfp->committed < 0) { | 211 | if (verfp->committed < 0) { |
175 | nfs_direct_set_hdr_verf(dreq, hdr); | 212 | nfs_direct_set_hdr_verf(dreq, hdr); |
176 | return 0; | 213 | return 0; |
@@ -193,7 +230,11 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, | |||
193 | 230 | ||
194 | verfp = nfs_direct_select_verf(dreq, data->ds_clp, | 231 | verfp = nfs_direct_select_verf(dreq, data->ds_clp, |
195 | data->ds_commit_index); | 232 | data->ds_commit_index); |
196 | WARN_ON_ONCE(verfp->committed < 0); | 233 | |
234 | /* verifier not set so always fail */ | ||
235 | if (verfp->committed < 0) | ||
236 | return 1; | ||
237 | |||
197 | return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); | 238 | return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); |
198 | } | 239 | } |
199 | 240 | ||
@@ -249,6 +290,18 @@ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, | |||
249 | cinfo->completion_ops = &nfs_direct_commit_completion_ops; | 290 | cinfo->completion_ops = &nfs_direct_commit_completion_ops; |
250 | } | 291 | } |
251 | 292 | ||
293 | static inline void nfs_direct_setup_mirroring(struct nfs_direct_req *dreq, | ||
294 | struct nfs_pageio_descriptor *pgio, | ||
295 | struct nfs_page *req) | ||
296 | { | ||
297 | int mirror_count = 1; | ||
298 | |||
299 | if (pgio->pg_ops->pg_get_mirror_count) | ||
300 | mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); | ||
301 | |||
302 | dreq->mirror_count = mirror_count; | ||
303 | } | ||
304 | |||
252 | static inline struct nfs_direct_req *nfs_direct_req_alloc(void) | 305 | static inline struct nfs_direct_req *nfs_direct_req_alloc(void) |
253 | { | 306 | { |
254 | struct nfs_direct_req *dreq; | 307 | struct nfs_direct_req *dreq; |
@@ -263,6 +316,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) | |||
263 | INIT_LIST_HEAD(&dreq->mds_cinfo.list); | 316 | INIT_LIST_HEAD(&dreq->mds_cinfo.list); |
264 | dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */ | 317 | dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */ |
265 | INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); | 318 | INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); |
319 | dreq->mirror_count = 1; | ||
266 | spin_lock_init(&dreq->lock); | 320 | spin_lock_init(&dreq->lock); |
267 | 321 | ||
268 | return dreq; | 322 | return dreq; |
@@ -369,7 +423,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) | |||
369 | if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0)) | 423 | if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0)) |
370 | dreq->error = hdr->error; | 424 | dreq->error = hdr->error; |
371 | else | 425 | else |
372 | dreq->count += hdr->good_bytes; | 426 | nfs_direct_good_bytes(dreq, hdr); |
427 | |||
373 | spin_unlock(&dreq->lock); | 428 | spin_unlock(&dreq->lock); |
374 | 429 | ||
375 | while (!list_empty(&hdr->pages)) { | 430 | while (!list_empty(&hdr->pages)) { |
@@ -547,6 +602,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, | |||
547 | 602 | ||
548 | dreq->inode = inode; | 603 | dreq->inode = inode; |
549 | dreq->bytes_left = count; | 604 | dreq->bytes_left = count; |
605 | dreq->io_start = pos; | ||
550 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); | 606 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); |
551 | l_ctx = nfs_get_lock_context(dreq->ctx); | 607 | l_ctx = nfs_get_lock_context(dreq->ctx); |
552 | if (IS_ERR(l_ctx)) { | 608 | if (IS_ERR(l_ctx)) { |
@@ -579,6 +635,20 @@ out: | |||
579 | return result; | 635 | return result; |
580 | } | 636 | } |
581 | 637 | ||
638 | static void | ||
639 | nfs_direct_write_scan_commit_list(struct inode *inode, | ||
640 | struct list_head *list, | ||
641 | struct nfs_commit_info *cinfo) | ||
642 | { | ||
643 | spin_lock(cinfo->lock); | ||
644 | #ifdef CONFIG_NFS_V4_1 | ||
645 | if (cinfo->ds != NULL && cinfo->ds->nwritten != 0) | ||
646 | NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); | ||
647 | #endif | ||
648 | nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0); | ||
649 | spin_unlock(cinfo->lock); | ||
650 | } | ||
651 | |||
582 | static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) | 652 | static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) |
583 | { | 653 | { |
584 | struct nfs_pageio_descriptor desc; | 654 | struct nfs_pageio_descriptor desc; |
@@ -586,20 +656,23 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) | |||
586 | LIST_HEAD(reqs); | 656 | LIST_HEAD(reqs); |
587 | struct nfs_commit_info cinfo; | 657 | struct nfs_commit_info cinfo; |
588 | LIST_HEAD(failed); | 658 | LIST_HEAD(failed); |
659 | int i; | ||
589 | 660 | ||
590 | nfs_init_cinfo_from_dreq(&cinfo, dreq); | 661 | nfs_init_cinfo_from_dreq(&cinfo, dreq); |
591 | pnfs_recover_commit_reqs(dreq->inode, &reqs, &cinfo); | 662 | nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); |
592 | spin_lock(cinfo.lock); | ||
593 | nfs_scan_commit_list(&cinfo.mds->list, &reqs, &cinfo, 0); | ||
594 | spin_unlock(cinfo.lock); | ||
595 | 663 | ||
596 | dreq->count = 0; | 664 | dreq->count = 0; |
665 | for (i = 0; i < dreq->mirror_count; i++) | ||
666 | dreq->mirrors[i].count = 0; | ||
597 | get_dreq(dreq); | 667 | get_dreq(dreq); |
598 | 668 | ||
599 | nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false, | 669 | nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false, |
600 | &nfs_direct_write_completion_ops); | 670 | &nfs_direct_write_completion_ops); |
601 | desc.pg_dreq = dreq; | 671 | desc.pg_dreq = dreq; |
602 | 672 | ||
673 | req = nfs_list_entry(reqs.next); | ||
674 | nfs_direct_setup_mirroring(dreq, &desc, req); | ||
675 | |||
603 | list_for_each_entry_safe(req, tmp, &reqs, wb_list) { | 676 | list_for_each_entry_safe(req, tmp, &reqs, wb_list) { |
604 | if (!nfs_pageio_add_request(&desc, req)) { | 677 | if (!nfs_pageio_add_request(&desc, req)) { |
605 | nfs_list_remove_request(req); | 678 | nfs_list_remove_request(req); |
@@ -646,7 +719,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) | |||
646 | nfs_list_remove_request(req); | 719 | nfs_list_remove_request(req); |
647 | if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { | 720 | if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { |
648 | /* Note the rewrite will go through mds */ | 721 | /* Note the rewrite will go through mds */ |
649 | nfs_mark_request_commit(req, NULL, &cinfo); | 722 | nfs_mark_request_commit(req, NULL, &cinfo, 0); |
650 | } else | 723 | } else |
651 | nfs_release_request(req); | 724 | nfs_release_request(req); |
652 | nfs_unlock_and_release_request(req); | 725 | nfs_unlock_and_release_request(req); |
@@ -721,7 +794,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) | |||
721 | dreq->error = hdr->error; | 794 | dreq->error = hdr->error; |
722 | } | 795 | } |
723 | if (dreq->error == 0) { | 796 | if (dreq->error == 0) { |
724 | dreq->count += hdr->good_bytes; | 797 | nfs_direct_good_bytes(dreq, hdr); |
725 | if (nfs_write_need_commit(hdr)) { | 798 | if (nfs_write_need_commit(hdr)) { |
726 | if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) | 799 | if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) |
727 | request_commit = true; | 800 | request_commit = true; |
@@ -745,7 +818,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) | |||
745 | nfs_list_remove_request(req); | 818 | nfs_list_remove_request(req); |
746 | if (request_commit) { | 819 | if (request_commit) { |
747 | kref_get(&req->wb_kref); | 820 | kref_get(&req->wb_kref); |
748 | nfs_mark_request_commit(req, hdr->lseg, &cinfo); | 821 | nfs_mark_request_commit(req, hdr->lseg, &cinfo, |
822 | hdr->ds_commit_idx); | ||
749 | } | 823 | } |
750 | nfs_unlock_and_release_request(req); | 824 | nfs_unlock_and_release_request(req); |
751 | } | 825 | } |
@@ -826,6 +900,9 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, | |||
826 | result = PTR_ERR(req); | 900 | result = PTR_ERR(req); |
827 | break; | 901 | break; |
828 | } | 902 | } |
903 | |||
904 | nfs_direct_setup_mirroring(dreq, &desc, req); | ||
905 | |||
829 | nfs_lock_request(req); | 906 | nfs_lock_request(req); |
830 | req->wb_index = pos >> PAGE_SHIFT; | 907 | req->wb_index = pos >> PAGE_SHIFT; |
831 | req->wb_offset = pos & ~PAGE_MASK; | 908 | req->wb_offset = pos & ~PAGE_MASK; |
@@ -934,6 +1011,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, | |||
934 | 1011 | ||
935 | dreq->inode = inode; | 1012 | dreq->inode = inode; |
936 | dreq->bytes_left = count; | 1013 | dreq->bytes_left = count; |
1014 | dreq->io_start = pos; | ||
937 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); | 1015 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); |
938 | l_ctx = nfs_get_lock_context(dreq->ctx); | 1016 | l_ctx = nfs_get_lock_context(dreq->ctx); |
939 | if (IS_ERR(l_ctx)) { | 1017 | if (IS_ERR(l_ctx)) { |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 2ab6f00dba5b..94712fc781fa 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -646,7 +646,6 @@ static const struct vm_operations_struct nfs_file_vm_ops = { | |||
646 | .fault = filemap_fault, | 646 | .fault = filemap_fault, |
647 | .map_pages = filemap_map_pages, | 647 | .map_pages = filemap_map_pages, |
648 | .page_mkwrite = nfs_vm_page_mkwrite, | 648 | .page_mkwrite = nfs_vm_page_mkwrite, |
649 | .remap_pages = generic_file_remap_pages, | ||
650 | }; | 649 | }; |
651 | 650 | ||
652 | static int nfs_need_sync_write(struct file *filp, struct inode *inode) | 651 | static int nfs_need_sync_write(struct file *filp, struct inode *inode) |
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 7afb52f6a25a..7ae1c263c5cf 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c | |||
@@ -118,13 +118,6 @@ static void filelayout_reset_read(struct nfs_pgio_header *hdr) | |||
118 | } | 118 | } |
119 | } | 119 | } |
120 | 120 | ||
121 | static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo) | ||
122 | { | ||
123 | if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) | ||
124 | return; | ||
125 | pnfs_return_layout(inode); | ||
126 | } | ||
127 | |||
128 | static int filelayout_async_handle_error(struct rpc_task *task, | 121 | static int filelayout_async_handle_error(struct rpc_task *task, |
129 | struct nfs4_state *state, | 122 | struct nfs4_state *state, |
130 | struct nfs_client *clp, | 123 | struct nfs_client *clp, |
@@ -207,7 +200,7 @@ static int filelayout_async_handle_error(struct rpc_task *task, | |||
207 | dprintk("%s DS connection error %d\n", __func__, | 200 | dprintk("%s DS connection error %d\n", __func__, |
208 | task->tk_status); | 201 | task->tk_status); |
209 | nfs4_mark_deviceid_unavailable(devid); | 202 | nfs4_mark_deviceid_unavailable(devid); |
210 | set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); | 203 | pnfs_error_mark_layout_for_return(inode, lseg); |
211 | rpc_wake_up(&tbl->slot_tbl_waitq); | 204 | rpc_wake_up(&tbl->slot_tbl_waitq); |
212 | /* fall through */ | 205 | /* fall through */ |
213 | default: | 206 | default: |
@@ -339,16 +332,6 @@ static void filelayout_read_count_stats(struct rpc_task *task, void *data) | |||
339 | rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics); | 332 | rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics); |
340 | } | 333 | } |
341 | 334 | ||
342 | static void filelayout_read_release(void *data) | ||
343 | { | ||
344 | struct nfs_pgio_header *hdr = data; | ||
345 | struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout; | ||
346 | |||
347 | filelayout_fenceme(lo->plh_inode, lo); | ||
348 | nfs_put_client(hdr->ds_clp); | ||
349 | hdr->mds_ops->rpc_release(data); | ||
350 | } | ||
351 | |||
352 | static int filelayout_write_done_cb(struct rpc_task *task, | 335 | static int filelayout_write_done_cb(struct rpc_task *task, |
353 | struct nfs_pgio_header *hdr) | 336 | struct nfs_pgio_header *hdr) |
354 | { | 337 | { |
@@ -371,17 +354,6 @@ static int filelayout_write_done_cb(struct rpc_task *task, | |||
371 | return 0; | 354 | return 0; |
372 | } | 355 | } |
373 | 356 | ||
374 | /* Fake up some data that will cause nfs_commit_release to retry the writes. */ | ||
375 | static void prepare_to_resend_writes(struct nfs_commit_data *data) | ||
376 | { | ||
377 | struct nfs_page *first = nfs_list_entry(data->pages.next); | ||
378 | |||
379 | data->task.tk_status = 0; | ||
380 | memcpy(&data->verf.verifier, &first->wb_verf, | ||
381 | sizeof(data->verf.verifier)); | ||
382 | data->verf.verifier.data[0]++; /* ensure verifier mismatch */ | ||
383 | } | ||
384 | |||
385 | static int filelayout_commit_done_cb(struct rpc_task *task, | 357 | static int filelayout_commit_done_cb(struct rpc_task *task, |
386 | struct nfs_commit_data *data) | 358 | struct nfs_commit_data *data) |
387 | { | 359 | { |
@@ -393,7 +365,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task, | |||
393 | 365 | ||
394 | switch (err) { | 366 | switch (err) { |
395 | case -NFS4ERR_RESET_TO_MDS: | 367 | case -NFS4ERR_RESET_TO_MDS: |
396 | prepare_to_resend_writes(data); | 368 | pnfs_generic_prepare_to_resend_writes(data); |
397 | return -EAGAIN; | 369 | return -EAGAIN; |
398 | case -EAGAIN: | 370 | case -EAGAIN: |
399 | rpc_restart_call_prepare(task); | 371 | rpc_restart_call_prepare(task); |
@@ -451,16 +423,6 @@ static void filelayout_write_count_stats(struct rpc_task *task, void *data) | |||
451 | rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics); | 423 | rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics); |
452 | } | 424 | } |
453 | 425 | ||
454 | static void filelayout_write_release(void *data) | ||
455 | { | ||
456 | struct nfs_pgio_header *hdr = data; | ||
457 | struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout; | ||
458 | |||
459 | filelayout_fenceme(lo->plh_inode, lo); | ||
460 | nfs_put_client(hdr->ds_clp); | ||
461 | hdr->mds_ops->rpc_release(data); | ||
462 | } | ||
463 | |||
464 | static void filelayout_commit_prepare(struct rpc_task *task, void *data) | 426 | static void filelayout_commit_prepare(struct rpc_task *task, void *data) |
465 | { | 427 | { |
466 | struct nfs_commit_data *wdata = data; | 428 | struct nfs_commit_data *wdata = data; |
@@ -471,14 +433,6 @@ static void filelayout_commit_prepare(struct rpc_task *task, void *data) | |||
471 | task); | 433 | task); |
472 | } | 434 | } |
473 | 435 | ||
474 | static void filelayout_write_commit_done(struct rpc_task *task, void *data) | ||
475 | { | ||
476 | struct nfs_commit_data *wdata = data; | ||
477 | |||
478 | /* Note this may cause RPC to be resent */ | ||
479 | wdata->mds_ops->rpc_call_done(task, data); | ||
480 | } | ||
481 | |||
482 | static void filelayout_commit_count_stats(struct rpc_task *task, void *data) | 436 | static void filelayout_commit_count_stats(struct rpc_task *task, void *data) |
483 | { | 437 | { |
484 | struct nfs_commit_data *cdata = data; | 438 | struct nfs_commit_data *cdata = data; |
@@ -486,35 +440,25 @@ static void filelayout_commit_count_stats(struct rpc_task *task, void *data) | |||
486 | rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics); | 440 | rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics); |
487 | } | 441 | } |
488 | 442 | ||
489 | static void filelayout_commit_release(void *calldata) | ||
490 | { | ||
491 | struct nfs_commit_data *data = calldata; | ||
492 | |||
493 | data->completion_ops->completion(data); | ||
494 | pnfs_put_lseg(data->lseg); | ||
495 | nfs_put_client(data->ds_clp); | ||
496 | nfs_commitdata_release(data); | ||
497 | } | ||
498 | |||
499 | static const struct rpc_call_ops filelayout_read_call_ops = { | 443 | static const struct rpc_call_ops filelayout_read_call_ops = { |
500 | .rpc_call_prepare = filelayout_read_prepare, | 444 | .rpc_call_prepare = filelayout_read_prepare, |
501 | .rpc_call_done = filelayout_read_call_done, | 445 | .rpc_call_done = filelayout_read_call_done, |
502 | .rpc_count_stats = filelayout_read_count_stats, | 446 | .rpc_count_stats = filelayout_read_count_stats, |
503 | .rpc_release = filelayout_read_release, | 447 | .rpc_release = pnfs_generic_rw_release, |
504 | }; | 448 | }; |
505 | 449 | ||
506 | static const struct rpc_call_ops filelayout_write_call_ops = { | 450 | static const struct rpc_call_ops filelayout_write_call_ops = { |
507 | .rpc_call_prepare = filelayout_write_prepare, | 451 | .rpc_call_prepare = filelayout_write_prepare, |
508 | .rpc_call_done = filelayout_write_call_done, | 452 | .rpc_call_done = filelayout_write_call_done, |
509 | .rpc_count_stats = filelayout_write_count_stats, | 453 | .rpc_count_stats = filelayout_write_count_stats, |
510 | .rpc_release = filelayout_write_release, | 454 | .rpc_release = pnfs_generic_rw_release, |
511 | }; | 455 | }; |
512 | 456 | ||
513 | static const struct rpc_call_ops filelayout_commit_call_ops = { | 457 | static const struct rpc_call_ops filelayout_commit_call_ops = { |
514 | .rpc_call_prepare = filelayout_commit_prepare, | 458 | .rpc_call_prepare = filelayout_commit_prepare, |
515 | .rpc_call_done = filelayout_write_commit_done, | 459 | .rpc_call_done = pnfs_generic_write_commit_done, |
516 | .rpc_count_stats = filelayout_commit_count_stats, | 460 | .rpc_count_stats = filelayout_commit_count_stats, |
517 | .rpc_release = filelayout_commit_release, | 461 | .rpc_release = pnfs_generic_commit_release, |
518 | }; | 462 | }; |
519 | 463 | ||
520 | static enum pnfs_try_status | 464 | static enum pnfs_try_status |
@@ -548,7 +492,7 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr) | |||
548 | /* No multipath support. Use first DS */ | 492 | /* No multipath support. Use first DS */ |
549 | atomic_inc(&ds->ds_clp->cl_count); | 493 | atomic_inc(&ds->ds_clp->cl_count); |
550 | hdr->ds_clp = ds->ds_clp; | 494 | hdr->ds_clp = ds->ds_clp; |
551 | hdr->ds_idx = idx; | 495 | hdr->ds_commit_idx = idx; |
552 | fh = nfs4_fl_select_ds_fh(lseg, j); | 496 | fh = nfs4_fl_select_ds_fh(lseg, j); |
553 | if (fh) | 497 | if (fh) |
554 | hdr->args.fh = fh; | 498 | hdr->args.fh = fh; |
@@ -557,8 +501,9 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr) | |||
557 | hdr->mds_offset = offset; | 501 | hdr->mds_offset = offset; |
558 | 502 | ||
559 | /* Perform an asynchronous read to ds */ | 503 | /* Perform an asynchronous read to ds */ |
560 | nfs_initiate_pgio(ds_clnt, hdr, | 504 | nfs_initiate_pgio(ds_clnt, hdr, hdr->cred, |
561 | &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN); | 505 | NFS_PROTO(hdr->inode), &filelayout_read_call_ops, |
506 | 0, RPC_TASK_SOFTCONN); | ||
562 | return PNFS_ATTEMPTED; | 507 | return PNFS_ATTEMPTED; |
563 | } | 508 | } |
564 | 509 | ||
@@ -591,16 +536,16 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync) | |||
591 | hdr->pgio_done_cb = filelayout_write_done_cb; | 536 | hdr->pgio_done_cb = filelayout_write_done_cb; |
592 | atomic_inc(&ds->ds_clp->cl_count); | 537 | atomic_inc(&ds->ds_clp->cl_count); |
593 | hdr->ds_clp = ds->ds_clp; | 538 | hdr->ds_clp = ds->ds_clp; |
594 | hdr->ds_idx = idx; | 539 | hdr->ds_commit_idx = idx; |
595 | fh = nfs4_fl_select_ds_fh(lseg, j); | 540 | fh = nfs4_fl_select_ds_fh(lseg, j); |
596 | if (fh) | 541 | if (fh) |
597 | hdr->args.fh = fh; | 542 | hdr->args.fh = fh; |
598 | hdr->args.offset = filelayout_get_dserver_offset(lseg, offset); | 543 | hdr->args.offset = filelayout_get_dserver_offset(lseg, offset); |
599 | 544 | ||
600 | /* Perform an asynchronous write */ | 545 | /* Perform an asynchronous write */ |
601 | nfs_initiate_pgio(ds_clnt, hdr, | 546 | nfs_initiate_pgio(ds_clnt, hdr, hdr->cred, |
602 | &filelayout_write_call_ops, sync, | 547 | NFS_PROTO(hdr->inode), &filelayout_write_call_ops, |
603 | RPC_TASK_SOFTCONN); | 548 | sync, RPC_TASK_SOFTCONN); |
604 | return PNFS_ATTEMPTED; | 549 | return PNFS_ATTEMPTED; |
605 | } | 550 | } |
606 | 551 | ||
@@ -988,12 +933,14 @@ static const struct nfs_pageio_ops filelayout_pg_read_ops = { | |||
988 | .pg_init = filelayout_pg_init_read, | 933 | .pg_init = filelayout_pg_init_read, |
989 | .pg_test = filelayout_pg_test, | 934 | .pg_test = filelayout_pg_test, |
990 | .pg_doio = pnfs_generic_pg_readpages, | 935 | .pg_doio = pnfs_generic_pg_readpages, |
936 | .pg_cleanup = pnfs_generic_pg_cleanup, | ||
991 | }; | 937 | }; |
992 | 938 | ||
993 | static const struct nfs_pageio_ops filelayout_pg_write_ops = { | 939 | static const struct nfs_pageio_ops filelayout_pg_write_ops = { |
994 | .pg_init = filelayout_pg_init_write, | 940 | .pg_init = filelayout_pg_init_write, |
995 | .pg_test = filelayout_pg_test, | 941 | .pg_test = filelayout_pg_test, |
996 | .pg_doio = pnfs_generic_pg_writepages, | 942 | .pg_doio = pnfs_generic_pg_writepages, |
943 | .pg_cleanup = pnfs_generic_pg_cleanup, | ||
997 | }; | 944 | }; |
998 | 945 | ||
999 | static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) | 946 | static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) |
@@ -1004,37 +951,11 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) | |||
1004 | return j; | 951 | return j; |
1005 | } | 952 | } |
1006 | 953 | ||
1007 | /* The generic layer is about to remove the req from the commit list. | ||
1008 | * If this will make the bucket empty, it will need to put the lseg reference. | ||
1009 | * Note this is must be called holding the inode (/cinfo) lock | ||
1010 | */ | ||
1011 | static void | ||
1012 | filelayout_clear_request_commit(struct nfs_page *req, | ||
1013 | struct nfs_commit_info *cinfo) | ||
1014 | { | ||
1015 | struct pnfs_layout_segment *freeme = NULL; | ||
1016 | |||
1017 | if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) | ||
1018 | goto out; | ||
1019 | cinfo->ds->nwritten--; | ||
1020 | if (list_is_singular(&req->wb_list)) { | ||
1021 | struct pnfs_commit_bucket *bucket; | ||
1022 | |||
1023 | bucket = list_first_entry(&req->wb_list, | ||
1024 | struct pnfs_commit_bucket, | ||
1025 | written); | ||
1026 | freeme = bucket->wlseg; | ||
1027 | bucket->wlseg = NULL; | ||
1028 | } | ||
1029 | out: | ||
1030 | nfs_request_remove_commit_list(req, cinfo); | ||
1031 | pnfs_put_lseg_locked(freeme); | ||
1032 | } | ||
1033 | |||
1034 | static void | 954 | static void |
1035 | filelayout_mark_request_commit(struct nfs_page *req, | 955 | filelayout_mark_request_commit(struct nfs_page *req, |
1036 | struct pnfs_layout_segment *lseg, | 956 | struct pnfs_layout_segment *lseg, |
1037 | struct nfs_commit_info *cinfo) | 957 | struct nfs_commit_info *cinfo, |
958 | u32 ds_commit_idx) | ||
1038 | 959 | ||
1039 | { | 960 | { |
1040 | struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); | 961 | struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); |
@@ -1064,7 +985,7 @@ filelayout_mark_request_commit(struct nfs_page *req, | |||
1064 | * is normally transferred to the COMMIT call and released | 985 | * is normally transferred to the COMMIT call and released |
1065 | * there. It could also be released if the last req is pulled | 986 | * there. It could also be released if the last req is pulled |
1066 | * off due to a rewrite, in which case it will be done in | 987 | * off due to a rewrite, in which case it will be done in |
1067 | * filelayout_clear_request_commit | 988 | * pnfs_generic_clear_request_commit |
1068 | */ | 989 | */ |
1069 | buckets[i].wlseg = pnfs_get_lseg(lseg); | 990 | buckets[i].wlseg = pnfs_get_lseg(lseg); |
1070 | } | 991 | } |
@@ -1081,7 +1002,7 @@ mds_commit: | |||
1081 | spin_unlock(cinfo->lock); | 1002 | spin_unlock(cinfo->lock); |
1082 | if (!cinfo->dreq) { | 1003 | if (!cinfo->dreq) { |
1083 | inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); | 1004 | inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); |
1084 | inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, | 1005 | inc_bdi_stat(inode_to_bdi(page_file_mapping(req->wb_page)->host), |
1085 | BDI_RECLAIMABLE); | 1006 | BDI_RECLAIMABLE); |
1086 | __mark_inode_dirty(req->wb_context->dentry->d_inode, | 1007 | __mark_inode_dirty(req->wb_context->dentry->d_inode, |
1087 | I_DIRTY_DATASYNC); | 1008 | I_DIRTY_DATASYNC); |
@@ -1138,101 +1059,15 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) | |||
1138 | fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); | 1059 | fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); |
1139 | if (fh) | 1060 | if (fh) |
1140 | data->args.fh = fh; | 1061 | data->args.fh = fh; |
1141 | return nfs_initiate_commit(ds_clnt, data, | 1062 | return nfs_initiate_commit(ds_clnt, data, NFS_PROTO(data->inode), |
1142 | &filelayout_commit_call_ops, how, | 1063 | &filelayout_commit_call_ops, how, |
1143 | RPC_TASK_SOFTCONN); | 1064 | RPC_TASK_SOFTCONN); |
1144 | out_err: | 1065 | out_err: |
1145 | prepare_to_resend_writes(data); | 1066 | pnfs_generic_prepare_to_resend_writes(data); |
1146 | filelayout_commit_release(data); | 1067 | pnfs_generic_commit_release(data); |
1147 | return -EAGAIN; | 1068 | return -EAGAIN; |
1148 | } | 1069 | } |
1149 | 1070 | ||
1150 | static int | ||
1151 | transfer_commit_list(struct list_head *src, struct list_head *dst, | ||
1152 | struct nfs_commit_info *cinfo, int max) | ||
1153 | { | ||
1154 | struct nfs_page *req, *tmp; | ||
1155 | int ret = 0; | ||
1156 | |||
1157 | list_for_each_entry_safe(req, tmp, src, wb_list) { | ||
1158 | if (!nfs_lock_request(req)) | ||
1159 | continue; | ||
1160 | kref_get(&req->wb_kref); | ||
1161 | if (cond_resched_lock(cinfo->lock)) | ||
1162 | list_safe_reset_next(req, tmp, wb_list); | ||
1163 | nfs_request_remove_commit_list(req, cinfo); | ||
1164 | clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); | ||
1165 | nfs_list_add_request(req, dst); | ||
1166 | ret++; | ||
1167 | if ((ret == max) && !cinfo->dreq) | ||
1168 | break; | ||
1169 | } | ||
1170 | return ret; | ||
1171 | } | ||
1172 | |||
1173 | /* Note called with cinfo->lock held. */ | ||
1174 | static int | ||
1175 | filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, | ||
1176 | struct nfs_commit_info *cinfo, | ||
1177 | int max) | ||
1178 | { | ||
1179 | struct list_head *src = &bucket->written; | ||
1180 | struct list_head *dst = &bucket->committing; | ||
1181 | int ret; | ||
1182 | |||
1183 | ret = transfer_commit_list(src, dst, cinfo, max); | ||
1184 | if (ret) { | ||
1185 | cinfo->ds->nwritten -= ret; | ||
1186 | cinfo->ds->ncommitting += ret; | ||
1187 | bucket->clseg = bucket->wlseg; | ||
1188 | if (list_empty(src)) | ||
1189 | bucket->wlseg = NULL; | ||
1190 | else | ||
1191 | pnfs_get_lseg(bucket->clseg); | ||
1192 | } | ||
1193 | return ret; | ||
1194 | } | ||
1195 | |||
1196 | /* Move reqs from written to committing lists, returning count of number moved. | ||
1197 | * Note called with cinfo->lock held. | ||
1198 | */ | ||
1199 | static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo, | ||
1200 | int max) | ||
1201 | { | ||
1202 | int i, rv = 0, cnt; | ||
1203 | |||
1204 | for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { | ||
1205 | cnt = filelayout_scan_ds_commit_list(&cinfo->ds->buckets[i], | ||
1206 | cinfo, max); | ||
1207 | max -= cnt; | ||
1208 | rv += cnt; | ||
1209 | } | ||
1210 | return rv; | ||
1211 | } | ||
1212 | |||
1213 | /* Pull everything off the committing lists and dump into @dst */ | ||
1214 | static void filelayout_recover_commit_reqs(struct list_head *dst, | ||
1215 | struct nfs_commit_info *cinfo) | ||
1216 | { | ||
1217 | struct pnfs_commit_bucket *b; | ||
1218 | struct pnfs_layout_segment *freeme; | ||
1219 | int i; | ||
1220 | |||
1221 | restart: | ||
1222 | spin_lock(cinfo->lock); | ||
1223 | for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { | ||
1224 | if (transfer_commit_list(&b->written, dst, cinfo, 0)) { | ||
1225 | freeme = b->wlseg; | ||
1226 | b->wlseg = NULL; | ||
1227 | spin_unlock(cinfo->lock); | ||
1228 | pnfs_put_lseg(freeme); | ||
1229 | goto restart; | ||
1230 | } | ||
1231 | } | ||
1232 | cinfo->ds->nwritten = 0; | ||
1233 | spin_unlock(cinfo->lock); | ||
1234 | } | ||
1235 | |||
1236 | /* filelayout_search_commit_reqs - Search lists in @cinfo for the head reqest | 1071 | /* filelayout_search_commit_reqs - Search lists in @cinfo for the head reqest |
1237 | * for @page | 1072 | * for @page |
1238 | * @cinfo - commit info for current inode | 1073 | * @cinfo - commit info for current inode |
@@ -1263,108 +1098,14 @@ filelayout_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page) | |||
1263 | return NULL; | 1098 | return NULL; |
1264 | } | 1099 | } |
1265 | 1100 | ||
1266 | static void filelayout_retry_commit(struct nfs_commit_info *cinfo, int idx) | ||
1267 | { | ||
1268 | struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; | ||
1269 | struct pnfs_commit_bucket *bucket; | ||
1270 | struct pnfs_layout_segment *freeme; | ||
1271 | int i; | ||
1272 | |||
1273 | for (i = idx; i < fl_cinfo->nbuckets; i++) { | ||
1274 | bucket = &fl_cinfo->buckets[i]; | ||
1275 | if (list_empty(&bucket->committing)) | ||
1276 | continue; | ||
1277 | nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); | ||
1278 | spin_lock(cinfo->lock); | ||
1279 | freeme = bucket->clseg; | ||
1280 | bucket->clseg = NULL; | ||
1281 | spin_unlock(cinfo->lock); | ||
1282 | pnfs_put_lseg(freeme); | ||
1283 | } | ||
1284 | } | ||
1285 | |||
1286 | static unsigned int | ||
1287 | alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) | ||
1288 | { | ||
1289 | struct pnfs_ds_commit_info *fl_cinfo; | ||
1290 | struct pnfs_commit_bucket *bucket; | ||
1291 | struct nfs_commit_data *data; | ||
1292 | int i; | ||
1293 | unsigned int nreq = 0; | ||
1294 | |||
1295 | fl_cinfo = cinfo->ds; | ||
1296 | bucket = fl_cinfo->buckets; | ||
1297 | for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { | ||
1298 | if (list_empty(&bucket->committing)) | ||
1299 | continue; | ||
1300 | data = nfs_commitdata_alloc(); | ||
1301 | if (!data) | ||
1302 | break; | ||
1303 | data->ds_commit_index = i; | ||
1304 | spin_lock(cinfo->lock); | ||
1305 | data->lseg = bucket->clseg; | ||
1306 | bucket->clseg = NULL; | ||
1307 | spin_unlock(cinfo->lock); | ||
1308 | list_add(&data->pages, list); | ||
1309 | nreq++; | ||
1310 | } | ||
1311 | |||
1312 | /* Clean up on error */ | ||
1313 | filelayout_retry_commit(cinfo, i); | ||
1314 | /* Caller will clean up entries put on list */ | ||
1315 | return nreq; | ||
1316 | } | ||
1317 | |||
1318 | /* This follows nfs_commit_list pretty closely */ | ||
1319 | static int | 1101 | static int |
1320 | filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, | 1102 | filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, |
1321 | int how, struct nfs_commit_info *cinfo) | 1103 | int how, struct nfs_commit_info *cinfo) |
1322 | { | 1104 | { |
1323 | struct nfs_commit_data *data, *tmp; | 1105 | return pnfs_generic_commit_pagelist(inode, mds_pages, how, cinfo, |
1324 | LIST_HEAD(list); | 1106 | filelayout_initiate_commit); |
1325 | unsigned int nreq = 0; | ||
1326 | |||
1327 | if (!list_empty(mds_pages)) { | ||
1328 | data = nfs_commitdata_alloc(); | ||
1329 | if (data != NULL) { | ||
1330 | data->lseg = NULL; | ||
1331 | list_add(&data->pages, &list); | ||
1332 | nreq++; | ||
1333 | } else { | ||
1334 | nfs_retry_commit(mds_pages, NULL, cinfo); | ||
1335 | filelayout_retry_commit(cinfo, 0); | ||
1336 | cinfo->completion_ops->error_cleanup(NFS_I(inode)); | ||
1337 | return -ENOMEM; | ||
1338 | } | ||
1339 | } | ||
1340 | |||
1341 | nreq += alloc_ds_commits(cinfo, &list); | ||
1342 | |||
1343 | if (nreq == 0) { | ||
1344 | cinfo->completion_ops->error_cleanup(NFS_I(inode)); | ||
1345 | goto out; | ||
1346 | } | ||
1347 | |||
1348 | atomic_add(nreq, &cinfo->mds->rpcs_out); | ||
1349 | |||
1350 | list_for_each_entry_safe(data, tmp, &list, pages) { | ||
1351 | list_del_init(&data->pages); | ||
1352 | if (!data->lseg) { | ||
1353 | nfs_init_commit(data, mds_pages, NULL, cinfo); | ||
1354 | nfs_initiate_commit(NFS_CLIENT(inode), data, | ||
1355 | data->mds_ops, how, 0); | ||
1356 | } else { | ||
1357 | struct pnfs_commit_bucket *buckets; | ||
1358 | |||
1359 | buckets = cinfo->ds->buckets; | ||
1360 | nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg, cinfo); | ||
1361 | filelayout_initiate_commit(data, how); | ||
1362 | } | ||
1363 | } | ||
1364 | out: | ||
1365 | cinfo->ds->ncommitting = 0; | ||
1366 | return PNFS_ATTEMPTED; | ||
1367 | } | 1107 | } |
1108 | |||
1368 | static struct nfs4_deviceid_node * | 1109 | static struct nfs4_deviceid_node * |
1369 | filelayout_alloc_deviceid_node(struct nfs_server *server, | 1110 | filelayout_alloc_deviceid_node(struct nfs_server *server, |
1370 | struct pnfs_device *pdev, gfp_t gfp_flags) | 1111 | struct pnfs_device *pdev, gfp_t gfp_flags) |
@@ -1421,9 +1162,9 @@ static struct pnfs_layoutdriver_type filelayout_type = { | |||
1421 | .pg_write_ops = &filelayout_pg_write_ops, | 1162 | .pg_write_ops = &filelayout_pg_write_ops, |
1422 | .get_ds_info = &filelayout_get_ds_info, | 1163 | .get_ds_info = &filelayout_get_ds_info, |
1423 | .mark_request_commit = filelayout_mark_request_commit, | 1164 | .mark_request_commit = filelayout_mark_request_commit, |
1424 | .clear_request_commit = filelayout_clear_request_commit, | 1165 | .clear_request_commit = pnfs_generic_clear_request_commit, |
1425 | .scan_commit_lists = filelayout_scan_commit_lists, | 1166 | .scan_commit_lists = pnfs_generic_scan_commit_lists, |
1426 | .recover_commit_reqs = filelayout_recover_commit_reqs, | 1167 | .recover_commit_reqs = pnfs_generic_recover_commit_reqs, |
1427 | .search_commit_reqs = filelayout_search_commit_reqs, | 1168 | .search_commit_reqs = filelayout_search_commit_reqs, |
1428 | .commit_pagelist = filelayout_commit_pagelist, | 1169 | .commit_pagelist = filelayout_commit_pagelist, |
1429 | .read_pagelist = filelayout_read_pagelist, | 1170 | .read_pagelist = filelayout_read_pagelist, |
diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h index 7c9f800c49d7..2896cb833a11 100644 --- a/fs/nfs/filelayout/filelayout.h +++ b/fs/nfs/filelayout/filelayout.h | |||
@@ -33,13 +33,6 @@ | |||
33 | #include "../pnfs.h" | 33 | #include "../pnfs.h" |
34 | 34 | ||
35 | /* | 35 | /* |
36 | * Default data server connection timeout and retrans vaules. | ||
37 | * Set by module paramters dataserver_timeo and dataserver_retrans. | ||
38 | */ | ||
39 | #define NFS4_DEF_DS_TIMEO 600 /* in tenths of a second */ | ||
40 | #define NFS4_DEF_DS_RETRANS 5 | ||
41 | |||
42 | /* | ||
43 | * Field testing shows we need to support up to 4096 stripe indices. | 36 | * Field testing shows we need to support up to 4096 stripe indices. |
44 | * We store each index as a u8 (u32 on the wire) to keep the memory footprint | 37 | * We store each index as a u8 (u32 on the wire) to keep the memory footprint |
45 | * reasonable. This in turn means we support a maximum of 256 | 38 | * reasonable. This in turn means we support a maximum of 256 |
@@ -48,32 +41,11 @@ | |||
48 | #define NFS4_PNFS_MAX_STRIPE_CNT 4096 | 41 | #define NFS4_PNFS_MAX_STRIPE_CNT 4096 |
49 | #define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */ | 42 | #define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */ |
50 | 43 | ||
51 | /* error codes for internal use */ | ||
52 | #define NFS4ERR_RESET_TO_MDS 12001 | ||
53 | |||
54 | enum stripetype4 { | 44 | enum stripetype4 { |
55 | STRIPE_SPARSE = 1, | 45 | STRIPE_SPARSE = 1, |
56 | STRIPE_DENSE = 2 | 46 | STRIPE_DENSE = 2 |
57 | }; | 47 | }; |
58 | 48 | ||
59 | /* Individual ip address */ | ||
60 | struct nfs4_pnfs_ds_addr { | ||
61 | struct sockaddr_storage da_addr; | ||
62 | size_t da_addrlen; | ||
63 | struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */ | ||
64 | char *da_remotestr; /* human readable addr+port */ | ||
65 | }; | ||
66 | |||
67 | struct nfs4_pnfs_ds { | ||
68 | struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ | ||
69 | char *ds_remotestr; /* comma sep list of addrs */ | ||
70 | struct list_head ds_addrs; | ||
71 | struct nfs_client *ds_clp; | ||
72 | atomic_t ds_count; | ||
73 | unsigned long ds_state; | ||
74 | #define NFS4DS_CONNECTING 0 /* ds is establishing connection */ | ||
75 | }; | ||
76 | |||
77 | struct nfs4_file_layout_dsaddr { | 49 | struct nfs4_file_layout_dsaddr { |
78 | struct nfs4_deviceid_node id_node; | 50 | struct nfs4_deviceid_node id_node; |
79 | u32 stripe_count; | 51 | u32 stripe_count; |
@@ -119,17 +91,6 @@ FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg) | |||
119 | return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node; | 91 | return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node; |
120 | } | 92 | } |
121 | 93 | ||
122 | static inline void | ||
123 | filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node) | ||
124 | { | ||
125 | u32 *p = (u32 *)&node->deviceid; | ||
126 | |||
127 | printk(KERN_WARNING "NFS: Deviceid [%x%x%x%x] marked out of use.\n", | ||
128 | p[0], p[1], p[2], p[3]); | ||
129 | |||
130 | set_bit(NFS_DEVICEID_INVALID, &node->flags); | ||
131 | } | ||
132 | |||
133 | static inline bool | 94 | static inline bool |
134 | filelayout_test_devid_invalid(struct nfs4_deviceid_node *node) | 95 | filelayout_test_devid_invalid(struct nfs4_deviceid_node *node) |
135 | { | 96 | { |
@@ -142,7 +103,6 @@ filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node); | |||
142 | extern struct nfs_fh * | 103 | extern struct nfs_fh * |
143 | nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); | 104 | nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); |
144 | 105 | ||
145 | extern void print_ds(struct nfs4_pnfs_ds *ds); | ||
146 | u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset); | 106 | u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset); |
147 | u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); | 107 | u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); |
148 | struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, | 108 | struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, |
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index bfecac781f19..4f372e224603 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c | |||
@@ -31,7 +31,6 @@ | |||
31 | #include <linux/nfs_fs.h> | 31 | #include <linux/nfs_fs.h> |
32 | #include <linux/vmalloc.h> | 32 | #include <linux/vmalloc.h> |
33 | #include <linux/module.h> | 33 | #include <linux/module.h> |
34 | #include <linux/sunrpc/addr.h> | ||
35 | 34 | ||
36 | #include "../internal.h" | 35 | #include "../internal.h" |
37 | #include "../nfs4session.h" | 36 | #include "../nfs4session.h" |
@@ -42,183 +41,6 @@ | |||
42 | static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; | 41 | static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; |
43 | static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; | 42 | static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; |
44 | 43 | ||
45 | /* | ||
46 | * Data server cache | ||
47 | * | ||
48 | * Data servers can be mapped to different device ids. | ||
49 | * nfs4_pnfs_ds reference counting | ||
50 | * - set to 1 on allocation | ||
51 | * - incremented when a device id maps a data server already in the cache. | ||
52 | * - decremented when deviceid is removed from the cache. | ||
53 | */ | ||
54 | static DEFINE_SPINLOCK(nfs4_ds_cache_lock); | ||
55 | static LIST_HEAD(nfs4_data_server_cache); | ||
56 | |||
57 | /* Debug routines */ | ||
58 | void | ||
59 | print_ds(struct nfs4_pnfs_ds *ds) | ||
60 | { | ||
61 | if (ds == NULL) { | ||
62 | printk("%s NULL device\n", __func__); | ||
63 | return; | ||
64 | } | ||
65 | printk(" ds %s\n" | ||
66 | " ref count %d\n" | ||
67 | " client %p\n" | ||
68 | " cl_exchange_flags %x\n", | ||
69 | ds->ds_remotestr, | ||
70 | atomic_read(&ds->ds_count), ds->ds_clp, | ||
71 | ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); | ||
72 | } | ||
73 | |||
74 | static bool | ||
75 | same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) | ||
76 | { | ||
77 | struct sockaddr_in *a, *b; | ||
78 | struct sockaddr_in6 *a6, *b6; | ||
79 | |||
80 | if (addr1->sa_family != addr2->sa_family) | ||
81 | return false; | ||
82 | |||
83 | switch (addr1->sa_family) { | ||
84 | case AF_INET: | ||
85 | a = (struct sockaddr_in *)addr1; | ||
86 | b = (struct sockaddr_in *)addr2; | ||
87 | |||
88 | if (a->sin_addr.s_addr == b->sin_addr.s_addr && | ||
89 | a->sin_port == b->sin_port) | ||
90 | return true; | ||
91 | break; | ||
92 | |||
93 | case AF_INET6: | ||
94 | a6 = (struct sockaddr_in6 *)addr1; | ||
95 | b6 = (struct sockaddr_in6 *)addr2; | ||
96 | |||
97 | /* LINKLOCAL addresses must have matching scope_id */ | ||
98 | if (ipv6_addr_src_scope(&a6->sin6_addr) == | ||
99 | IPV6_ADDR_SCOPE_LINKLOCAL && | ||
100 | a6->sin6_scope_id != b6->sin6_scope_id) | ||
101 | return false; | ||
102 | |||
103 | if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && | ||
104 | a6->sin6_port == b6->sin6_port) | ||
105 | return true; | ||
106 | break; | ||
107 | |||
108 | default: | ||
109 | dprintk("%s: unhandled address family: %u\n", | ||
110 | __func__, addr1->sa_family); | ||
111 | return false; | ||
112 | } | ||
113 | |||
114 | return false; | ||
115 | } | ||
116 | |||
117 | static bool | ||
118 | _same_data_server_addrs_locked(const struct list_head *dsaddrs1, | ||
119 | const struct list_head *dsaddrs2) | ||
120 | { | ||
121 | struct nfs4_pnfs_ds_addr *da1, *da2; | ||
122 | |||
123 | /* step through both lists, comparing as we go */ | ||
124 | for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node), | ||
125 | da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node); | ||
126 | da1 != NULL && da2 != NULL; | ||
127 | da1 = list_entry(da1->da_node.next, typeof(*da1), da_node), | ||
128 | da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) { | ||
129 | if (!same_sockaddr((struct sockaddr *)&da1->da_addr, | ||
130 | (struct sockaddr *)&da2->da_addr)) | ||
131 | return false; | ||
132 | } | ||
133 | if (da1 == NULL && da2 == NULL) | ||
134 | return true; | ||
135 | |||
136 | return false; | ||
137 | } | ||
138 | |||
139 | /* | ||
140 | * Lookup DS by addresses. nfs4_ds_cache_lock is held | ||
141 | */ | ||
142 | static struct nfs4_pnfs_ds * | ||
143 | _data_server_lookup_locked(const struct list_head *dsaddrs) | ||
144 | { | ||
145 | struct nfs4_pnfs_ds *ds; | ||
146 | |||
147 | list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) | ||
148 | if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) | ||
149 | return ds; | ||
150 | return NULL; | ||
151 | } | ||
152 | |||
153 | /* | ||
154 | * Create an rpc connection to the nfs4_pnfs_ds data server | ||
155 | * Currently only supports IPv4 and IPv6 addresses | ||
156 | */ | ||
157 | static int | ||
158 | nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) | ||
159 | { | ||
160 | struct nfs_client *clp = ERR_PTR(-EIO); | ||
161 | struct nfs4_pnfs_ds_addr *da; | ||
162 | int status = 0; | ||
163 | |||
164 | dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, | ||
165 | mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); | ||
166 | |||
167 | list_for_each_entry(da, &ds->ds_addrs, da_node) { | ||
168 | dprintk("%s: DS %s: trying address %s\n", | ||
169 | __func__, ds->ds_remotestr, da->da_remotestr); | ||
170 | |||
171 | clp = nfs4_set_ds_client(mds_srv->nfs_client, | ||
172 | (struct sockaddr *)&da->da_addr, | ||
173 | da->da_addrlen, IPPROTO_TCP, | ||
174 | dataserver_timeo, dataserver_retrans); | ||
175 | if (!IS_ERR(clp)) | ||
176 | break; | ||
177 | } | ||
178 | |||
179 | if (IS_ERR(clp)) { | ||
180 | status = PTR_ERR(clp); | ||
181 | goto out; | ||
182 | } | ||
183 | |||
184 | status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time); | ||
185 | if (status) | ||
186 | goto out_put; | ||
187 | |||
188 | smp_wmb(); | ||
189 | ds->ds_clp = clp; | ||
190 | dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); | ||
191 | out: | ||
192 | return status; | ||
193 | out_put: | ||
194 | nfs_put_client(clp); | ||
195 | goto out; | ||
196 | } | ||
197 | |||
198 | static void | ||
199 | destroy_ds(struct nfs4_pnfs_ds *ds) | ||
200 | { | ||
201 | struct nfs4_pnfs_ds_addr *da; | ||
202 | |||
203 | dprintk("--> %s\n", __func__); | ||
204 | ifdebug(FACILITY) | ||
205 | print_ds(ds); | ||
206 | |||
207 | nfs_put_client(ds->ds_clp); | ||
208 | |||
209 | while (!list_empty(&ds->ds_addrs)) { | ||
210 | da = list_first_entry(&ds->ds_addrs, | ||
211 | struct nfs4_pnfs_ds_addr, | ||
212 | da_node); | ||
213 | list_del_init(&da->da_node); | ||
214 | kfree(da->da_remotestr); | ||
215 | kfree(da); | ||
216 | } | ||
217 | |||
218 | kfree(ds->ds_remotestr); | ||
219 | kfree(ds); | ||
220 | } | ||
221 | |||
222 | void | 44 | void |
223 | nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | 45 | nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) |
224 | { | 46 | { |
@@ -229,259 +51,13 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | |||
229 | 51 | ||
230 | for (i = 0; i < dsaddr->ds_num; i++) { | 52 | for (i = 0; i < dsaddr->ds_num; i++) { |
231 | ds = dsaddr->ds_list[i]; | 53 | ds = dsaddr->ds_list[i]; |
232 | if (ds != NULL) { | 54 | if (ds != NULL) |
233 | if (atomic_dec_and_lock(&ds->ds_count, | 55 | nfs4_pnfs_ds_put(ds); |
234 | &nfs4_ds_cache_lock)) { | ||
235 | list_del_init(&ds->ds_node); | ||
236 | spin_unlock(&nfs4_ds_cache_lock); | ||
237 | destroy_ds(ds); | ||
238 | } | ||
239 | } | ||
240 | } | 56 | } |
241 | kfree(dsaddr->stripe_indices); | 57 | kfree(dsaddr->stripe_indices); |
242 | kfree(dsaddr); | 58 | kfree(dsaddr); |
243 | } | 59 | } |
244 | 60 | ||
245 | /* | ||
246 | * Create a string with a human readable address and port to avoid | ||
247 | * complicated setup around many dprinks. | ||
248 | */ | ||
249 | static char * | ||
250 | nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) | ||
251 | { | ||
252 | struct nfs4_pnfs_ds_addr *da; | ||
253 | char *remotestr; | ||
254 | size_t len; | ||
255 | char *p; | ||
256 | |||
257 | len = 3; /* '{', '}' and eol */ | ||
258 | list_for_each_entry(da, dsaddrs, da_node) { | ||
259 | len += strlen(da->da_remotestr) + 1; /* string plus comma */ | ||
260 | } | ||
261 | |||
262 | remotestr = kzalloc(len, gfp_flags); | ||
263 | if (!remotestr) | ||
264 | return NULL; | ||
265 | |||
266 | p = remotestr; | ||
267 | *(p++) = '{'; | ||
268 | len--; | ||
269 | list_for_each_entry(da, dsaddrs, da_node) { | ||
270 | size_t ll = strlen(da->da_remotestr); | ||
271 | |||
272 | if (ll > len) | ||
273 | goto out_err; | ||
274 | |||
275 | memcpy(p, da->da_remotestr, ll); | ||
276 | p += ll; | ||
277 | len -= ll; | ||
278 | |||
279 | if (len < 1) | ||
280 | goto out_err; | ||
281 | (*p++) = ','; | ||
282 | len--; | ||
283 | } | ||
284 | if (len < 2) | ||
285 | goto out_err; | ||
286 | *(p++) = '}'; | ||
287 | *p = '\0'; | ||
288 | return remotestr; | ||
289 | out_err: | ||
290 | kfree(remotestr); | ||
291 | return NULL; | ||
292 | } | ||
293 | |||
294 | static struct nfs4_pnfs_ds * | ||
295 | nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) | ||
296 | { | ||
297 | struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; | ||
298 | char *remotestr; | ||
299 | |||
300 | if (list_empty(dsaddrs)) { | ||
301 | dprintk("%s: no addresses defined\n", __func__); | ||
302 | goto out; | ||
303 | } | ||
304 | |||
305 | ds = kzalloc(sizeof(*ds), gfp_flags); | ||
306 | if (!ds) | ||
307 | goto out; | ||
308 | |||
309 | /* this is only used for debugging, so it's ok if its NULL */ | ||
310 | remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); | ||
311 | |||
312 | spin_lock(&nfs4_ds_cache_lock); | ||
313 | tmp_ds = _data_server_lookup_locked(dsaddrs); | ||
314 | if (tmp_ds == NULL) { | ||
315 | INIT_LIST_HEAD(&ds->ds_addrs); | ||
316 | list_splice_init(dsaddrs, &ds->ds_addrs); | ||
317 | ds->ds_remotestr = remotestr; | ||
318 | atomic_set(&ds->ds_count, 1); | ||
319 | INIT_LIST_HEAD(&ds->ds_node); | ||
320 | ds->ds_clp = NULL; | ||
321 | list_add(&ds->ds_node, &nfs4_data_server_cache); | ||
322 | dprintk("%s add new data server %s\n", __func__, | ||
323 | ds->ds_remotestr); | ||
324 | } else { | ||
325 | kfree(remotestr); | ||
326 | kfree(ds); | ||
327 | atomic_inc(&tmp_ds->ds_count); | ||
328 | dprintk("%s data server %s found, inc'ed ds_count to %d\n", | ||
329 | __func__, tmp_ds->ds_remotestr, | ||
330 | atomic_read(&tmp_ds->ds_count)); | ||
331 | ds = tmp_ds; | ||
332 | } | ||
333 | spin_unlock(&nfs4_ds_cache_lock); | ||
334 | out: | ||
335 | return ds; | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * Currently only supports ipv4, ipv6 and one multi-path address. | ||
340 | */ | ||
341 | static struct nfs4_pnfs_ds_addr * | ||
342 | decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags) | ||
343 | { | ||
344 | struct nfs4_pnfs_ds_addr *da = NULL; | ||
345 | char *buf, *portstr; | ||
346 | __be16 port; | ||
347 | int nlen, rlen; | ||
348 | int tmp[2]; | ||
349 | __be32 *p; | ||
350 | char *netid, *match_netid; | ||
351 | size_t len, match_netid_len; | ||
352 | char *startsep = ""; | ||
353 | char *endsep = ""; | ||
354 | |||
355 | |||
356 | /* r_netid */ | ||
357 | p = xdr_inline_decode(streamp, 4); | ||
358 | if (unlikely(!p)) | ||
359 | goto out_err; | ||
360 | nlen = be32_to_cpup(p++); | ||
361 | |||
362 | p = xdr_inline_decode(streamp, nlen); | ||
363 | if (unlikely(!p)) | ||
364 | goto out_err; | ||
365 | |||
366 | netid = kmalloc(nlen+1, gfp_flags); | ||
367 | if (unlikely(!netid)) | ||
368 | goto out_err; | ||
369 | |||
370 | netid[nlen] = '\0'; | ||
371 | memcpy(netid, p, nlen); | ||
372 | |||
373 | /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ | ||
374 | p = xdr_inline_decode(streamp, 4); | ||
375 | if (unlikely(!p)) | ||
376 | goto out_free_netid; | ||
377 | rlen = be32_to_cpup(p); | ||
378 | |||
379 | p = xdr_inline_decode(streamp, rlen); | ||
380 | if (unlikely(!p)) | ||
381 | goto out_free_netid; | ||
382 | |||
383 | /* port is ".ABC.DEF", 8 chars max */ | ||
384 | if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { | ||
385 | dprintk("%s: Invalid address, length %d\n", __func__, | ||
386 | rlen); | ||
387 | goto out_free_netid; | ||
388 | } | ||
389 | buf = kmalloc(rlen + 1, gfp_flags); | ||
390 | if (!buf) { | ||
391 | dprintk("%s: Not enough memory\n", __func__); | ||
392 | goto out_free_netid; | ||
393 | } | ||
394 | buf[rlen] = '\0'; | ||
395 | memcpy(buf, p, rlen); | ||
396 | |||
397 | /* replace port '.' with '-' */ | ||
398 | portstr = strrchr(buf, '.'); | ||
399 | if (!portstr) { | ||
400 | dprintk("%s: Failed finding expected dot in port\n", | ||
401 | __func__); | ||
402 | goto out_free_buf; | ||
403 | } | ||
404 | *portstr = '-'; | ||
405 | |||
406 | /* find '.' between address and port */ | ||
407 | portstr = strrchr(buf, '.'); | ||
408 | if (!portstr) { | ||
409 | dprintk("%s: Failed finding expected dot between address and " | ||
410 | "port\n", __func__); | ||
411 | goto out_free_buf; | ||
412 | } | ||
413 | *portstr = '\0'; | ||
414 | |||
415 | da = kzalloc(sizeof(*da), gfp_flags); | ||
416 | if (unlikely(!da)) | ||
417 | goto out_free_buf; | ||
418 | |||
419 | INIT_LIST_HEAD(&da->da_node); | ||
420 | |||
421 | if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, | ||
422 | sizeof(da->da_addr))) { | ||
423 | dprintk("%s: error parsing address %s\n", __func__, buf); | ||
424 | goto out_free_da; | ||
425 | } | ||
426 | |||
427 | portstr++; | ||
428 | sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); | ||
429 | port = htons((tmp[0] << 8) | (tmp[1])); | ||
430 | |||
431 | switch (da->da_addr.ss_family) { | ||
432 | case AF_INET: | ||
433 | ((struct sockaddr_in *)&da->da_addr)->sin_port = port; | ||
434 | da->da_addrlen = sizeof(struct sockaddr_in); | ||
435 | match_netid = "tcp"; | ||
436 | match_netid_len = 3; | ||
437 | break; | ||
438 | |||
439 | case AF_INET6: | ||
440 | ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; | ||
441 | da->da_addrlen = sizeof(struct sockaddr_in6); | ||
442 | match_netid = "tcp6"; | ||
443 | match_netid_len = 4; | ||
444 | startsep = "["; | ||
445 | endsep = "]"; | ||
446 | break; | ||
447 | |||
448 | default: | ||
449 | dprintk("%s: unsupported address family: %u\n", | ||
450 | __func__, da->da_addr.ss_family); | ||
451 | goto out_free_da; | ||
452 | } | ||
453 | |||
454 | if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { | ||
455 | dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", | ||
456 | __func__, netid, match_netid); | ||
457 | goto out_free_da; | ||
458 | } | ||
459 | |||
460 | /* save human readable address */ | ||
461 | len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; | ||
462 | da->da_remotestr = kzalloc(len, gfp_flags); | ||
463 | |||
464 | /* NULL is ok, only used for dprintk */ | ||
465 | if (da->da_remotestr) | ||
466 | snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, | ||
467 | buf, endsep, ntohs(port)); | ||
468 | |||
469 | dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); | ||
470 | kfree(buf); | ||
471 | kfree(netid); | ||
472 | return da; | ||
473 | |||
474 | out_free_da: | ||
475 | kfree(da); | ||
476 | out_free_buf: | ||
477 | dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); | ||
478 | kfree(buf); | ||
479 | out_free_netid: | ||
480 | kfree(netid); | ||
481 | out_err: | ||
482 | return NULL; | ||
483 | } | ||
484 | |||
485 | /* Decode opaque device data and return the result */ | 61 | /* Decode opaque device data and return the result */ |
486 | struct nfs4_file_layout_dsaddr * | 62 | struct nfs4_file_layout_dsaddr * |
487 | nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | 63 | nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, |
@@ -584,8 +160,8 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | |||
584 | 160 | ||
585 | mp_count = be32_to_cpup(p); /* multipath count */ | 161 | mp_count = be32_to_cpup(p); /* multipath count */ |
586 | for (j = 0; j < mp_count; j++) { | 162 | for (j = 0; j < mp_count; j++) { |
587 | da = decode_ds_addr(server->nfs_client->cl_net, | 163 | da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, |
588 | &stream, gfp_flags); | 164 | &stream, gfp_flags); |
589 | if (da) | 165 | if (da) |
590 | list_add_tail(&da->da_node, &dsaddrs); | 166 | list_add_tail(&da->da_node, &dsaddrs); |
591 | } | 167 | } |
@@ -681,22 +257,7 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) | |||
681 | return flseg->fh_array[i]; | 257 | return flseg->fh_array[i]; |
682 | } | 258 | } |
683 | 259 | ||
684 | static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) | 260 | /* Upon return, either ds is connected, or ds is NULL */ |
685 | { | ||
686 | might_sleep(); | ||
687 | wait_on_bit_action(&ds->ds_state, NFS4DS_CONNECTING, | ||
688 | nfs_wait_bit_killable, TASK_KILLABLE); | ||
689 | } | ||
690 | |||
691 | static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) | ||
692 | { | ||
693 | smp_mb__before_atomic(); | ||
694 | clear_bit(NFS4DS_CONNECTING, &ds->ds_state); | ||
695 | smp_mb__after_atomic(); | ||
696 | wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING); | ||
697 | } | ||
698 | |||
699 | |||
700 | struct nfs4_pnfs_ds * | 261 | struct nfs4_pnfs_ds * |
701 | nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) | 262 | nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) |
702 | { | 263 | { |
@@ -704,29 +265,23 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) | |||
704 | struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; | 265 | struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; |
705 | struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); | 266 | struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); |
706 | struct nfs4_pnfs_ds *ret = ds; | 267 | struct nfs4_pnfs_ds *ret = ds; |
268 | struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); | ||
707 | 269 | ||
708 | if (ds == NULL) { | 270 | if (ds == NULL) { |
709 | printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", | 271 | printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", |
710 | __func__, ds_idx); | 272 | __func__, ds_idx); |
711 | filelayout_mark_devid_invalid(devid); | 273 | pnfs_generic_mark_devid_invalid(devid); |
712 | goto out; | 274 | goto out; |
713 | } | 275 | } |
714 | smp_rmb(); | 276 | smp_rmb(); |
715 | if (ds->ds_clp) | 277 | if (ds->ds_clp) |
716 | goto out_test_devid; | 278 | goto out_test_devid; |
717 | 279 | ||
718 | if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { | 280 | nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, |
719 | struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); | 281 | dataserver_retrans, 4, |
720 | int err; | 282 | s->nfs_client->cl_minorversion, |
721 | 283 | s->nfs_client->cl_rpcclient->cl_auth->au_flavor); | |
722 | err = nfs4_ds_connect(s, ds); | 284 | |
723 | if (err) | ||
724 | nfs4_mark_deviceid_unavailable(devid); | ||
725 | nfs4_clear_ds_conn_bit(ds); | ||
726 | } else { | ||
727 | /* Either ds is connected, or ds is NULL */ | ||
728 | nfs4_wait_ds_connect(ds); | ||
729 | } | ||
730 | out_test_devid: | 285 | out_test_devid: |
731 | if (filelayout_test_devid_unavailable(devid)) | 286 | if (filelayout_test_devid_unavailable(devid)) |
732 | ret = NULL; | 287 | ret = NULL; |
diff --git a/fs/nfs/flexfilelayout/Makefile b/fs/nfs/flexfilelayout/Makefile new file mode 100644 index 000000000000..1d2c9f6bbcd4 --- /dev/null +++ b/fs/nfs/flexfilelayout/Makefile | |||
@@ -0,0 +1,5 @@ | |||
1 | # | ||
2 | # Makefile for the pNFS Flexfile Layout Driver kernel module | ||
3 | # | ||
4 | obj-$(CONFIG_PNFS_FLEXFILE_LAYOUT) += nfs_layout_flexfiles.o | ||
5 | nfs_layout_flexfiles-y := flexfilelayout.o flexfilelayoutdev.o | ||
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c new file mode 100644 index 000000000000..c22ecaa86c1c --- /dev/null +++ b/fs/nfs/flexfilelayout/flexfilelayout.c | |||
@@ -0,0 +1,1574 @@ | |||
1 | /* | ||
2 | * Module for pnfs flexfile layout driver. | ||
3 | * | ||
4 | * Copyright (c) 2014, Primary Data, Inc. All rights reserved. | ||
5 | * | ||
6 | * Tao Peng <bergwolf@primarydata.com> | ||
7 | */ | ||
8 | |||
9 | #include <linux/nfs_fs.h> | ||
10 | #include <linux/nfs_page.h> | ||
11 | #include <linux/module.h> | ||
12 | |||
13 | #include <linux/sunrpc/metrics.h> | ||
14 | #include <linux/nfs_idmap.h> | ||
15 | |||
16 | #include "flexfilelayout.h" | ||
17 | #include "../nfs4session.h" | ||
18 | #include "../internal.h" | ||
19 | #include "../delegation.h" | ||
20 | #include "../nfs4trace.h" | ||
21 | #include "../iostat.h" | ||
22 | #include "../nfs.h" | ||
23 | |||
24 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
25 | |||
26 | #define FF_LAYOUT_POLL_RETRY_MAX (15*HZ) | ||
27 | |||
28 | static struct pnfs_layout_hdr * | ||
29 | ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) | ||
30 | { | ||
31 | struct nfs4_flexfile_layout *ffl; | ||
32 | |||
33 | ffl = kzalloc(sizeof(*ffl), gfp_flags); | ||
34 | if (ffl) { | ||
35 | INIT_LIST_HEAD(&ffl->error_list); | ||
36 | return &ffl->generic_hdr; | ||
37 | } else | ||
38 | return NULL; | ||
39 | } | ||
40 | |||
41 | static void | ||
42 | ff_layout_free_layout_hdr(struct pnfs_layout_hdr *lo) | ||
43 | { | ||
44 | struct nfs4_ff_layout_ds_err *err, *n; | ||
45 | |||
46 | list_for_each_entry_safe(err, n, &FF_LAYOUT_FROM_HDR(lo)->error_list, | ||
47 | list) { | ||
48 | list_del(&err->list); | ||
49 | kfree(err); | ||
50 | } | ||
51 | kfree(FF_LAYOUT_FROM_HDR(lo)); | ||
52 | } | ||
53 | |||
54 | static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) | ||
55 | { | ||
56 | __be32 *p; | ||
57 | |||
58 | p = xdr_inline_decode(xdr, NFS4_STATEID_SIZE); | ||
59 | if (unlikely(p == NULL)) | ||
60 | return -ENOBUFS; | ||
61 | memcpy(stateid, p, NFS4_STATEID_SIZE); | ||
62 | dprintk("%s: stateid id= [%x%x%x%x]\n", __func__, | ||
63 | p[0], p[1], p[2], p[3]); | ||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | static int decode_deviceid(struct xdr_stream *xdr, struct nfs4_deviceid *devid) | ||
68 | { | ||
69 | __be32 *p; | ||
70 | |||
71 | p = xdr_inline_decode(xdr, NFS4_DEVICEID4_SIZE); | ||
72 | if (unlikely(!p)) | ||
73 | return -ENOBUFS; | ||
74 | memcpy(devid, p, NFS4_DEVICEID4_SIZE); | ||
75 | nfs4_print_deviceid(devid); | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | static int decode_nfs_fh(struct xdr_stream *xdr, struct nfs_fh *fh) | ||
80 | { | ||
81 | __be32 *p; | ||
82 | |||
83 | p = xdr_inline_decode(xdr, 4); | ||
84 | if (unlikely(!p)) | ||
85 | return -ENOBUFS; | ||
86 | fh->size = be32_to_cpup(p++); | ||
87 | if (fh->size > sizeof(struct nfs_fh)) { | ||
88 | printk(KERN_ERR "NFS flexfiles: Too big fh received %d\n", | ||
89 | fh->size); | ||
90 | return -EOVERFLOW; | ||
91 | } | ||
92 | /* fh.data */ | ||
93 | p = xdr_inline_decode(xdr, fh->size); | ||
94 | if (unlikely(!p)) | ||
95 | return -ENOBUFS; | ||
96 | memcpy(&fh->data, p, fh->size); | ||
97 | dprintk("%s: fh len %d\n", __func__, fh->size); | ||
98 | |||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | /* | ||
103 | * Currently only stringified uids and gids are accepted. | ||
104 | * I.e., kerberos is not supported to the DSes, so no pricipals. | ||
105 | * | ||
106 | * That means that one common function will suffice, but when | ||
107 | * principals are added, this should be split to accomodate | ||
108 | * calls to both nfs_map_name_to_uid() and nfs_map_group_to_gid(). | ||
109 | */ | ||
110 | static int | ||
111 | decode_name(struct xdr_stream *xdr, u32 *id) | ||
112 | { | ||
113 | __be32 *p; | ||
114 | int len; | ||
115 | |||
116 | /* opaque_length(4)*/ | ||
117 | p = xdr_inline_decode(xdr, 4); | ||
118 | if (unlikely(!p)) | ||
119 | return -ENOBUFS; | ||
120 | len = be32_to_cpup(p++); | ||
121 | if (len < 0) | ||
122 | return -EINVAL; | ||
123 | |||
124 | dprintk("%s: len %u\n", __func__, len); | ||
125 | |||
126 | /* opaque body */ | ||
127 | p = xdr_inline_decode(xdr, len); | ||
128 | if (unlikely(!p)) | ||
129 | return -ENOBUFS; | ||
130 | |||
131 | if (!nfs_map_string_to_numeric((char *)p, len, id)) | ||
132 | return -EINVAL; | ||
133 | |||
134 | return 0; | ||
135 | } | ||
136 | |||
137 | static void ff_layout_free_mirror_array(struct nfs4_ff_layout_segment *fls) | ||
138 | { | ||
139 | int i; | ||
140 | |||
141 | if (fls->mirror_array) { | ||
142 | for (i = 0; i < fls->mirror_array_cnt; i++) { | ||
143 | /* normally mirror_ds is freed in | ||
144 | * .free_deviceid_node but we still do it here | ||
145 | * for .alloc_lseg error path */ | ||
146 | if (fls->mirror_array[i]) { | ||
147 | kfree(fls->mirror_array[i]->fh_versions); | ||
148 | nfs4_ff_layout_put_deviceid(fls->mirror_array[i]->mirror_ds); | ||
149 | kfree(fls->mirror_array[i]); | ||
150 | } | ||
151 | } | ||
152 | kfree(fls->mirror_array); | ||
153 | fls->mirror_array = NULL; | ||
154 | } | ||
155 | } | ||
156 | |||
157 | static int ff_layout_check_layout(struct nfs4_layoutget_res *lgr) | ||
158 | { | ||
159 | int ret = 0; | ||
160 | |||
161 | dprintk("--> %s\n", __func__); | ||
162 | |||
163 | /* FIXME: remove this check when layout segment support is added */ | ||
164 | if (lgr->range.offset != 0 || | ||
165 | lgr->range.length != NFS4_MAX_UINT64) { | ||
166 | dprintk("%s Only whole file layouts supported. Use MDS i/o\n", | ||
167 | __func__); | ||
168 | ret = -EINVAL; | ||
169 | } | ||
170 | |||
171 | dprintk("--> %s returns %d\n", __func__, ret); | ||
172 | return ret; | ||
173 | } | ||
174 | |||
175 | static void _ff_layout_free_lseg(struct nfs4_ff_layout_segment *fls) | ||
176 | { | ||
177 | if (fls) { | ||
178 | ff_layout_free_mirror_array(fls); | ||
179 | kfree(fls); | ||
180 | } | ||
181 | } | ||
182 | |||
183 | static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) | ||
184 | { | ||
185 | struct nfs4_ff_layout_mirror *tmp; | ||
186 | int i, j; | ||
187 | |||
188 | for (i = 0; i < fls->mirror_array_cnt - 1; i++) { | ||
189 | for (j = i + 1; j < fls->mirror_array_cnt; j++) | ||
190 | if (fls->mirror_array[i]->efficiency < | ||
191 | fls->mirror_array[j]->efficiency) { | ||
192 | tmp = fls->mirror_array[i]; | ||
193 | fls->mirror_array[i] = fls->mirror_array[j]; | ||
194 | fls->mirror_array[j] = tmp; | ||
195 | } | ||
196 | } | ||
197 | } | ||
198 | |||
199 | static struct pnfs_layout_segment * | ||
200 | ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, | ||
201 | struct nfs4_layoutget_res *lgr, | ||
202 | gfp_t gfp_flags) | ||
203 | { | ||
204 | struct pnfs_layout_segment *ret; | ||
205 | struct nfs4_ff_layout_segment *fls = NULL; | ||
206 | struct xdr_stream stream; | ||
207 | struct xdr_buf buf; | ||
208 | struct page *scratch; | ||
209 | u64 stripe_unit; | ||
210 | u32 mirror_array_cnt; | ||
211 | __be32 *p; | ||
212 | int i, rc; | ||
213 | |||
214 | dprintk("--> %s\n", __func__); | ||
215 | scratch = alloc_page(gfp_flags); | ||
216 | if (!scratch) | ||
217 | return ERR_PTR(-ENOMEM); | ||
218 | |||
219 | xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, | ||
220 | lgr->layoutp->len); | ||
221 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | ||
222 | |||
223 | /* stripe unit and mirror_array_cnt */ | ||
224 | rc = -EIO; | ||
225 | p = xdr_inline_decode(&stream, 8 + 4); | ||
226 | if (!p) | ||
227 | goto out_err_free; | ||
228 | |||
229 | p = xdr_decode_hyper(p, &stripe_unit); | ||
230 | mirror_array_cnt = be32_to_cpup(p++); | ||
231 | dprintk("%s: stripe_unit=%llu mirror_array_cnt=%u\n", __func__, | ||
232 | stripe_unit, mirror_array_cnt); | ||
233 | |||
234 | if (mirror_array_cnt > NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT || | ||
235 | mirror_array_cnt == 0) | ||
236 | goto out_err_free; | ||
237 | |||
238 | rc = -ENOMEM; | ||
239 | fls = kzalloc(sizeof(*fls), gfp_flags); | ||
240 | if (!fls) | ||
241 | goto out_err_free; | ||
242 | |||
243 | fls->mirror_array_cnt = mirror_array_cnt; | ||
244 | fls->stripe_unit = stripe_unit; | ||
245 | fls->mirror_array = kcalloc(fls->mirror_array_cnt, | ||
246 | sizeof(fls->mirror_array[0]), gfp_flags); | ||
247 | if (fls->mirror_array == NULL) | ||
248 | goto out_err_free; | ||
249 | |||
250 | for (i = 0; i < fls->mirror_array_cnt; i++) { | ||
251 | struct nfs4_deviceid devid; | ||
252 | struct nfs4_deviceid_node *idnode; | ||
253 | u32 ds_count; | ||
254 | u32 fh_count; | ||
255 | int j; | ||
256 | |||
257 | rc = -EIO; | ||
258 | p = xdr_inline_decode(&stream, 4); | ||
259 | if (!p) | ||
260 | goto out_err_free; | ||
261 | ds_count = be32_to_cpup(p); | ||
262 | |||
263 | /* FIXME: allow for striping? */ | ||
264 | if (ds_count != 1) | ||
265 | goto out_err_free; | ||
266 | |||
267 | fls->mirror_array[i] = | ||
268 | kzalloc(sizeof(struct nfs4_ff_layout_mirror), | ||
269 | gfp_flags); | ||
270 | if (fls->mirror_array[i] == NULL) { | ||
271 | rc = -ENOMEM; | ||
272 | goto out_err_free; | ||
273 | } | ||
274 | |||
275 | spin_lock_init(&fls->mirror_array[i]->lock); | ||
276 | fls->mirror_array[i]->ds_count = ds_count; | ||
277 | |||
278 | /* deviceid */ | ||
279 | rc = decode_deviceid(&stream, &devid); | ||
280 | if (rc) | ||
281 | goto out_err_free; | ||
282 | |||
283 | idnode = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode), | ||
284 | &devid, lh->plh_lc_cred, | ||
285 | gfp_flags); | ||
286 | /* | ||
287 | * upon success, mirror_ds is allocated by previous | ||
288 | * getdeviceinfo, or newly by .alloc_deviceid_node | ||
289 | * nfs4_find_get_deviceid failure is indeed getdeviceinfo falure | ||
290 | */ | ||
291 | if (idnode) | ||
292 | fls->mirror_array[i]->mirror_ds = | ||
293 | FF_LAYOUT_MIRROR_DS(idnode); | ||
294 | else | ||
295 | goto out_err_free; | ||
296 | |||
297 | /* efficiency */ | ||
298 | rc = -EIO; | ||
299 | p = xdr_inline_decode(&stream, 4); | ||
300 | if (!p) | ||
301 | goto out_err_free; | ||
302 | fls->mirror_array[i]->efficiency = be32_to_cpup(p); | ||
303 | |||
304 | /* stateid */ | ||
305 | rc = decode_stateid(&stream, &fls->mirror_array[i]->stateid); | ||
306 | if (rc) | ||
307 | goto out_err_free; | ||
308 | |||
309 | /* fh */ | ||
310 | p = xdr_inline_decode(&stream, 4); | ||
311 | if (!p) | ||
312 | goto out_err_free; | ||
313 | fh_count = be32_to_cpup(p); | ||
314 | |||
315 | fls->mirror_array[i]->fh_versions = | ||
316 | kzalloc(fh_count * sizeof(struct nfs_fh), | ||
317 | gfp_flags); | ||
318 | if (fls->mirror_array[i]->fh_versions == NULL) { | ||
319 | rc = -ENOMEM; | ||
320 | goto out_err_free; | ||
321 | } | ||
322 | |||
323 | for (j = 0; j < fh_count; j++) { | ||
324 | rc = decode_nfs_fh(&stream, | ||
325 | &fls->mirror_array[i]->fh_versions[j]); | ||
326 | if (rc) | ||
327 | goto out_err_free; | ||
328 | } | ||
329 | |||
330 | fls->mirror_array[i]->fh_versions_cnt = fh_count; | ||
331 | |||
332 | /* user */ | ||
333 | rc = decode_name(&stream, &fls->mirror_array[i]->uid); | ||
334 | if (rc) | ||
335 | goto out_err_free; | ||
336 | |||
337 | /* group */ | ||
338 | rc = decode_name(&stream, &fls->mirror_array[i]->gid); | ||
339 | if (rc) | ||
340 | goto out_err_free; | ||
341 | |||
342 | dprintk("%s: uid %d gid %d\n", __func__, | ||
343 | fls->mirror_array[i]->uid, | ||
344 | fls->mirror_array[i]->gid); | ||
345 | } | ||
346 | |||
347 | ff_layout_sort_mirrors(fls); | ||
348 | rc = ff_layout_check_layout(lgr); | ||
349 | if (rc) | ||
350 | goto out_err_free; | ||
351 | |||
352 | ret = &fls->generic_hdr; | ||
353 | dprintk("<-- %s (success)\n", __func__); | ||
354 | out_free_page: | ||
355 | __free_page(scratch); | ||
356 | return ret; | ||
357 | out_err_free: | ||
358 | _ff_layout_free_lseg(fls); | ||
359 | ret = ERR_PTR(rc); | ||
360 | dprintk("<-- %s (%d)\n", __func__, rc); | ||
361 | goto out_free_page; | ||
362 | } | ||
363 | |||
364 | static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout) | ||
365 | { | ||
366 | struct pnfs_layout_segment *lseg; | ||
367 | |||
368 | list_for_each_entry(lseg, &layout->plh_segs, pls_list) | ||
369 | if (lseg->pls_range.iomode == IOMODE_RW) | ||
370 | return true; | ||
371 | |||
372 | return false; | ||
373 | } | ||
374 | |||
375 | static void | ||
376 | ff_layout_free_lseg(struct pnfs_layout_segment *lseg) | ||
377 | { | ||
378 | struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); | ||
379 | int i; | ||
380 | |||
381 | dprintk("--> %s\n", __func__); | ||
382 | |||
383 | for (i = 0; i < fls->mirror_array_cnt; i++) { | ||
384 | if (fls->mirror_array[i]) { | ||
385 | nfs4_ff_layout_put_deviceid(fls->mirror_array[i]->mirror_ds); | ||
386 | fls->mirror_array[i]->mirror_ds = NULL; | ||
387 | if (fls->mirror_array[i]->cred) { | ||
388 | put_rpccred(fls->mirror_array[i]->cred); | ||
389 | fls->mirror_array[i]->cred = NULL; | ||
390 | } | ||
391 | } | ||
392 | } | ||
393 | |||
394 | if (lseg->pls_range.iomode == IOMODE_RW) { | ||
395 | struct nfs4_flexfile_layout *ffl; | ||
396 | struct inode *inode; | ||
397 | |||
398 | ffl = FF_LAYOUT_FROM_HDR(lseg->pls_layout); | ||
399 | inode = ffl->generic_hdr.plh_inode; | ||
400 | spin_lock(&inode->i_lock); | ||
401 | if (!ff_layout_has_rw_segments(lseg->pls_layout)) { | ||
402 | ffl->commit_info.nbuckets = 0; | ||
403 | kfree(ffl->commit_info.buckets); | ||
404 | ffl->commit_info.buckets = NULL; | ||
405 | } | ||
406 | spin_unlock(&inode->i_lock); | ||
407 | } | ||
408 | _ff_layout_free_lseg(fls); | ||
409 | } | ||
410 | |||
411 | /* Return 1 until we have multiple lsegs support */ | ||
412 | static int | ||
413 | ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls) | ||
414 | { | ||
415 | return 1; | ||
416 | } | ||
417 | |||
418 | static int | ||
419 | ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, | ||
420 | struct nfs_commit_info *cinfo, | ||
421 | gfp_t gfp_flags) | ||
422 | { | ||
423 | struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); | ||
424 | struct pnfs_commit_bucket *buckets; | ||
425 | int size; | ||
426 | |||
427 | if (cinfo->ds->nbuckets != 0) { | ||
428 | /* This assumes there is only one RW lseg per file. | ||
429 | * To support multiple lseg per file, we need to | ||
430 | * change struct pnfs_commit_bucket to allow dynamic | ||
431 | * increasing nbuckets. | ||
432 | */ | ||
433 | return 0; | ||
434 | } | ||
435 | |||
436 | size = ff_layout_get_lseg_count(fls) * FF_LAYOUT_MIRROR_COUNT(lseg); | ||
437 | |||
438 | buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), | ||
439 | gfp_flags); | ||
440 | if (!buckets) | ||
441 | return -ENOMEM; | ||
442 | else { | ||
443 | int i; | ||
444 | |||
445 | spin_lock(cinfo->lock); | ||
446 | if (cinfo->ds->nbuckets != 0) | ||
447 | kfree(buckets); | ||
448 | else { | ||
449 | cinfo->ds->buckets = buckets; | ||
450 | cinfo->ds->nbuckets = size; | ||
451 | for (i = 0; i < size; i++) { | ||
452 | INIT_LIST_HEAD(&buckets[i].written); | ||
453 | INIT_LIST_HEAD(&buckets[i].committing); | ||
454 | /* mark direct verifier as unset */ | ||
455 | buckets[i].direct_verf.committed = | ||
456 | NFS_INVALID_STABLE_HOW; | ||
457 | } | ||
458 | } | ||
459 | spin_unlock(cinfo->lock); | ||
460 | return 0; | ||
461 | } | ||
462 | } | ||
463 | |||
464 | static struct nfs4_pnfs_ds * | ||
465 | ff_layout_choose_best_ds_for_read(struct nfs_pageio_descriptor *pgio, | ||
466 | int *best_idx) | ||
467 | { | ||
468 | struct nfs4_ff_layout_segment *fls; | ||
469 | struct nfs4_pnfs_ds *ds; | ||
470 | int idx; | ||
471 | |||
472 | fls = FF_LAYOUT_LSEG(pgio->pg_lseg); | ||
473 | /* mirrors are sorted by efficiency */ | ||
474 | for (idx = 0; idx < fls->mirror_array_cnt; idx++) { | ||
475 | ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, idx, false); | ||
476 | if (ds) { | ||
477 | *best_idx = idx; | ||
478 | return ds; | ||
479 | } | ||
480 | } | ||
481 | |||
482 | return NULL; | ||
483 | } | ||
484 | |||
485 | static void | ||
486 | ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, | ||
487 | struct nfs_page *req) | ||
488 | { | ||
489 | struct nfs_pgio_mirror *pgm; | ||
490 | struct nfs4_ff_layout_mirror *mirror; | ||
491 | struct nfs4_pnfs_ds *ds; | ||
492 | int ds_idx; | ||
493 | |||
494 | /* Use full layout for now */ | ||
495 | if (!pgio->pg_lseg) | ||
496 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
497 | req->wb_context, | ||
498 | 0, | ||
499 | NFS4_MAX_UINT64, | ||
500 | IOMODE_READ, | ||
501 | GFP_KERNEL); | ||
502 | /* If no lseg, fall back to read through mds */ | ||
503 | if (pgio->pg_lseg == NULL) | ||
504 | goto out_mds; | ||
505 | |||
506 | ds = ff_layout_choose_best_ds_for_read(pgio, &ds_idx); | ||
507 | if (!ds) | ||
508 | goto out_mds; | ||
509 | mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); | ||
510 | |||
511 | pgio->pg_mirror_idx = ds_idx; | ||
512 | |||
513 | /* read always uses only one mirror - idx 0 for pgio layer */ | ||
514 | pgm = &pgio->pg_mirrors[0]; | ||
515 | pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; | ||
516 | |||
517 | return; | ||
518 | out_mds: | ||
519 | pnfs_put_lseg(pgio->pg_lseg); | ||
520 | pgio->pg_lseg = NULL; | ||
521 | nfs_pageio_reset_read_mds(pgio); | ||
522 | } | ||
523 | |||
524 | static void | ||
525 | ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, | ||
526 | struct nfs_page *req) | ||
527 | { | ||
528 | struct nfs4_ff_layout_mirror *mirror; | ||
529 | struct nfs_pgio_mirror *pgm; | ||
530 | struct nfs_commit_info cinfo; | ||
531 | struct nfs4_pnfs_ds *ds; | ||
532 | int i; | ||
533 | int status; | ||
534 | |||
535 | if (!pgio->pg_lseg) | ||
536 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
537 | req->wb_context, | ||
538 | 0, | ||
539 | NFS4_MAX_UINT64, | ||
540 | IOMODE_RW, | ||
541 | GFP_NOFS); | ||
542 | /* If no lseg, fall back to write through mds */ | ||
543 | if (pgio->pg_lseg == NULL) | ||
544 | goto out_mds; | ||
545 | |||
546 | nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq); | ||
547 | status = ff_layout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS); | ||
548 | if (status < 0) | ||
549 | goto out_mds; | ||
550 | |||
551 | /* Use a direct mapping of ds_idx to pgio mirror_idx */ | ||
552 | if (WARN_ON_ONCE(pgio->pg_mirror_count != | ||
553 | FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))) | ||
554 | goto out_mds; | ||
555 | |||
556 | for (i = 0; i < pgio->pg_mirror_count; i++) { | ||
557 | ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true); | ||
558 | if (!ds) | ||
559 | goto out_mds; | ||
560 | pgm = &pgio->pg_mirrors[i]; | ||
561 | mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); | ||
562 | pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize; | ||
563 | } | ||
564 | |||
565 | return; | ||
566 | |||
567 | out_mds: | ||
568 | pnfs_put_lseg(pgio->pg_lseg); | ||
569 | pgio->pg_lseg = NULL; | ||
570 | nfs_pageio_reset_write_mds(pgio); | ||
571 | } | ||
572 | |||
573 | static unsigned int | ||
574 | ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio, | ||
575 | struct nfs_page *req) | ||
576 | { | ||
577 | if (!pgio->pg_lseg) | ||
578 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
579 | req->wb_context, | ||
580 | 0, | ||
581 | NFS4_MAX_UINT64, | ||
582 | IOMODE_RW, | ||
583 | GFP_NOFS); | ||
584 | if (pgio->pg_lseg) | ||
585 | return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg); | ||
586 | |||
587 | /* no lseg means that pnfs is not in use, so no mirroring here */ | ||
588 | pnfs_put_lseg(pgio->pg_lseg); | ||
589 | pgio->pg_lseg = NULL; | ||
590 | nfs_pageio_reset_write_mds(pgio); | ||
591 | return 1; | ||
592 | } | ||
593 | |||
594 | static const struct nfs_pageio_ops ff_layout_pg_read_ops = { | ||
595 | .pg_init = ff_layout_pg_init_read, | ||
596 | .pg_test = pnfs_generic_pg_test, | ||
597 | .pg_doio = pnfs_generic_pg_readpages, | ||
598 | .pg_cleanup = pnfs_generic_pg_cleanup, | ||
599 | }; | ||
600 | |||
601 | static const struct nfs_pageio_ops ff_layout_pg_write_ops = { | ||
602 | .pg_init = ff_layout_pg_init_write, | ||
603 | .pg_test = pnfs_generic_pg_test, | ||
604 | .pg_doio = pnfs_generic_pg_writepages, | ||
605 | .pg_get_mirror_count = ff_layout_pg_get_mirror_count_write, | ||
606 | .pg_cleanup = pnfs_generic_pg_cleanup, | ||
607 | }; | ||
608 | |||
609 | static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) | ||
610 | { | ||
611 | struct rpc_task *task = &hdr->task; | ||
612 | |||
613 | pnfs_layoutcommit_inode(hdr->inode, false); | ||
614 | |||
615 | if (retry_pnfs) { | ||
616 | dprintk("%s Reset task %5u for i/o through pNFS " | ||
617 | "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, | ||
618 | hdr->task.tk_pid, | ||
619 | hdr->inode->i_sb->s_id, | ||
620 | (unsigned long long)NFS_FILEID(hdr->inode), | ||
621 | hdr->args.count, | ||
622 | (unsigned long long)hdr->args.offset); | ||
623 | |||
624 | if (!hdr->dreq) { | ||
625 | struct nfs_open_context *ctx; | ||
626 | |||
627 | ctx = nfs_list_entry(hdr->pages.next)->wb_context; | ||
628 | set_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); | ||
629 | hdr->completion_ops->error_cleanup(&hdr->pages); | ||
630 | } else { | ||
631 | nfs_direct_set_resched_writes(hdr->dreq); | ||
632 | /* fake unstable write to let common nfs resend pages */ | ||
633 | hdr->verf.committed = NFS_UNSTABLE; | ||
634 | hdr->good_bytes = 0; | ||
635 | } | ||
636 | return; | ||
637 | } | ||
638 | |||
639 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { | ||
640 | dprintk("%s Reset task %5u for i/o through MDS " | ||
641 | "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, | ||
642 | hdr->task.tk_pid, | ||
643 | hdr->inode->i_sb->s_id, | ||
644 | (unsigned long long)NFS_FILEID(hdr->inode), | ||
645 | hdr->args.count, | ||
646 | (unsigned long long)hdr->args.offset); | ||
647 | |||
648 | task->tk_status = pnfs_write_done_resend_to_mds(hdr); | ||
649 | } | ||
650 | } | ||
651 | |||
652 | static void ff_layout_reset_read(struct nfs_pgio_header *hdr) | ||
653 | { | ||
654 | struct rpc_task *task = &hdr->task; | ||
655 | |||
656 | pnfs_layoutcommit_inode(hdr->inode, false); | ||
657 | |||
658 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { | ||
659 | dprintk("%s Reset task %5u for i/o through MDS " | ||
660 | "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, | ||
661 | hdr->task.tk_pid, | ||
662 | hdr->inode->i_sb->s_id, | ||
663 | (unsigned long long)NFS_FILEID(hdr->inode), | ||
664 | hdr->args.count, | ||
665 | (unsigned long long)hdr->args.offset); | ||
666 | |||
667 | task->tk_status = pnfs_read_done_resend_to_mds(hdr); | ||
668 | } | ||
669 | } | ||
670 | |||
671 | static int ff_layout_async_handle_error_v4(struct rpc_task *task, | ||
672 | struct nfs4_state *state, | ||
673 | struct nfs_client *clp, | ||
674 | struct pnfs_layout_segment *lseg, | ||
675 | int idx) | ||
676 | { | ||
677 | struct pnfs_layout_hdr *lo = lseg->pls_layout; | ||
678 | struct inode *inode = lo->plh_inode; | ||
679 | struct nfs_server *mds_server = NFS_SERVER(inode); | ||
680 | |||
681 | struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); | ||
682 | struct nfs_client *mds_client = mds_server->nfs_client; | ||
683 | struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; | ||
684 | |||
685 | if (task->tk_status >= 0) | ||
686 | return 0; | ||
687 | |||
688 | switch (task->tk_status) { | ||
689 | /* MDS state errors */ | ||
690 | case -NFS4ERR_DELEG_REVOKED: | ||
691 | case -NFS4ERR_ADMIN_REVOKED: | ||
692 | case -NFS4ERR_BAD_STATEID: | ||
693 | if (state == NULL) | ||
694 | break; | ||
695 | nfs_remove_bad_delegation(state->inode); | ||
696 | case -NFS4ERR_OPENMODE: | ||
697 | if (state == NULL) | ||
698 | break; | ||
699 | if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) | ||
700 | goto out_bad_stateid; | ||
701 | goto wait_on_recovery; | ||
702 | case -NFS4ERR_EXPIRED: | ||
703 | if (state != NULL) { | ||
704 | if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) | ||
705 | goto out_bad_stateid; | ||
706 | } | ||
707 | nfs4_schedule_lease_recovery(mds_client); | ||
708 | goto wait_on_recovery; | ||
709 | /* DS session errors */ | ||
710 | case -NFS4ERR_BADSESSION: | ||
711 | case -NFS4ERR_BADSLOT: | ||
712 | case -NFS4ERR_BAD_HIGH_SLOT: | ||
713 | case -NFS4ERR_DEADSESSION: | ||
714 | case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: | ||
715 | case -NFS4ERR_SEQ_FALSE_RETRY: | ||
716 | case -NFS4ERR_SEQ_MISORDERED: | ||
717 | dprintk("%s ERROR %d, Reset session. Exchangeid " | ||
718 | "flags 0x%x\n", __func__, task->tk_status, | ||
719 | clp->cl_exchange_flags); | ||
720 | nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); | ||
721 | break; | ||
722 | case -NFS4ERR_DELAY: | ||
723 | case -NFS4ERR_GRACE: | ||
724 | rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX); | ||
725 | break; | ||
726 | case -NFS4ERR_RETRY_UNCACHED_REP: | ||
727 | break; | ||
728 | /* Invalidate Layout errors */ | ||
729 | case -NFS4ERR_PNFS_NO_LAYOUT: | ||
730 | case -ESTALE: /* mapped NFS4ERR_STALE */ | ||
731 | case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */ | ||
732 | case -EISDIR: /* mapped NFS4ERR_ISDIR */ | ||
733 | case -NFS4ERR_FHEXPIRED: | ||
734 | case -NFS4ERR_WRONG_TYPE: | ||
735 | dprintk("%s Invalid layout error %d\n", __func__, | ||
736 | task->tk_status); | ||
737 | /* | ||
738 | * Destroy layout so new i/o will get a new layout. | ||
739 | * Layout will not be destroyed until all current lseg | ||
740 | * references are put. Mark layout as invalid to resend failed | ||
741 | * i/o and all i/o waiting on the slot table to the MDS until | ||
742 | * layout is destroyed and a new valid layout is obtained. | ||
743 | */ | ||
744 | pnfs_destroy_layout(NFS_I(inode)); | ||
745 | rpc_wake_up(&tbl->slot_tbl_waitq); | ||
746 | goto reset; | ||
747 | /* RPC connection errors */ | ||
748 | case -ECONNREFUSED: | ||
749 | case -EHOSTDOWN: | ||
750 | case -EHOSTUNREACH: | ||
751 | case -ENETUNREACH: | ||
752 | case -EIO: | ||
753 | case -ETIMEDOUT: | ||
754 | case -EPIPE: | ||
755 | dprintk("%s DS connection error %d\n", __func__, | ||
756 | task->tk_status); | ||
757 | nfs4_mark_deviceid_unavailable(devid); | ||
758 | rpc_wake_up(&tbl->slot_tbl_waitq); | ||
759 | /* fall through */ | ||
760 | default: | ||
761 | if (ff_layout_has_available_ds(lseg)) | ||
762 | return -NFS4ERR_RESET_TO_PNFS; | ||
763 | reset: | ||
764 | dprintk("%s Retry through MDS. Error %d\n", __func__, | ||
765 | task->tk_status); | ||
766 | return -NFS4ERR_RESET_TO_MDS; | ||
767 | } | ||
768 | out: | ||
769 | task->tk_status = 0; | ||
770 | return -EAGAIN; | ||
771 | out_bad_stateid: | ||
772 | task->tk_status = -EIO; | ||
773 | return 0; | ||
774 | wait_on_recovery: | ||
775 | rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL); | ||
776 | if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0) | ||
777 | rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task); | ||
778 | goto out; | ||
779 | } | ||
780 | |||
781 | /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ | ||
782 | static int ff_layout_async_handle_error_v3(struct rpc_task *task, | ||
783 | struct pnfs_layout_segment *lseg, | ||
784 | int idx) | ||
785 | { | ||
786 | struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); | ||
787 | |||
788 | if (task->tk_status >= 0) | ||
789 | return 0; | ||
790 | |||
791 | if (task->tk_status != -EJUKEBOX) { | ||
792 | dprintk("%s DS connection error %d\n", __func__, | ||
793 | task->tk_status); | ||
794 | nfs4_mark_deviceid_unavailable(devid); | ||
795 | if (ff_layout_has_available_ds(lseg)) | ||
796 | return -NFS4ERR_RESET_TO_PNFS; | ||
797 | else | ||
798 | return -NFS4ERR_RESET_TO_MDS; | ||
799 | } | ||
800 | |||
801 | if (task->tk_status == -EJUKEBOX) | ||
802 | nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); | ||
803 | task->tk_status = 0; | ||
804 | rpc_restart_call(task); | ||
805 | rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); | ||
806 | return -EAGAIN; | ||
807 | } | ||
808 | |||
809 | static int ff_layout_async_handle_error(struct rpc_task *task, | ||
810 | struct nfs4_state *state, | ||
811 | struct nfs_client *clp, | ||
812 | struct pnfs_layout_segment *lseg, | ||
813 | int idx) | ||
814 | { | ||
815 | int vers = clp->cl_nfs_mod->rpc_vers->number; | ||
816 | |||
817 | switch (vers) { | ||
818 | case 3: | ||
819 | return ff_layout_async_handle_error_v3(task, lseg, idx); | ||
820 | case 4: | ||
821 | return ff_layout_async_handle_error_v4(task, state, clp, | ||
822 | lseg, idx); | ||
823 | default: | ||
824 | /* should never happen */ | ||
825 | WARN_ON_ONCE(1); | ||
826 | return 0; | ||
827 | } | ||
828 | } | ||
829 | |||
830 | static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, | ||
831 | int idx, u64 offset, u64 length, | ||
832 | u32 status, int opnum) | ||
833 | { | ||
834 | struct nfs4_ff_layout_mirror *mirror; | ||
835 | int err; | ||
836 | |||
837 | mirror = FF_LAYOUT_COMP(lseg, idx); | ||
838 | err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), | ||
839 | mirror, offset, length, status, opnum, | ||
840 | GFP_NOIO); | ||
841 | dprintk("%s: err %d op %d status %u\n", __func__, err, opnum, status); | ||
842 | } | ||
843 | |||
844 | /* NFS_PROTO call done callback routines */ | ||
845 | |||
846 | static int ff_layout_read_done_cb(struct rpc_task *task, | ||
847 | struct nfs_pgio_header *hdr) | ||
848 | { | ||
849 | struct inode *inode; | ||
850 | int err; | ||
851 | |||
852 | trace_nfs4_pnfs_read(hdr, task->tk_status); | ||
853 | if (task->tk_status == -ETIMEDOUT && !hdr->res.op_status) | ||
854 | hdr->res.op_status = NFS4ERR_NXIO; | ||
855 | if (task->tk_status < 0 && hdr->res.op_status) | ||
856 | ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, | ||
857 | hdr->args.offset, hdr->args.count, | ||
858 | hdr->res.op_status, OP_READ); | ||
859 | err = ff_layout_async_handle_error(task, hdr->args.context->state, | ||
860 | hdr->ds_clp, hdr->lseg, | ||
861 | hdr->pgio_mirror_idx); | ||
862 | |||
863 | switch (err) { | ||
864 | case -NFS4ERR_RESET_TO_PNFS: | ||
865 | set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, | ||
866 | &hdr->lseg->pls_layout->plh_flags); | ||
867 | pnfs_read_resend_pnfs(hdr); | ||
868 | return task->tk_status; | ||
869 | case -NFS4ERR_RESET_TO_MDS: | ||
870 | inode = hdr->lseg->pls_layout->plh_inode; | ||
871 | pnfs_error_mark_layout_for_return(inode, hdr->lseg); | ||
872 | ff_layout_reset_read(hdr); | ||
873 | return task->tk_status; | ||
874 | case -EAGAIN: | ||
875 | rpc_restart_call_prepare(task); | ||
876 | return -EAGAIN; | ||
877 | } | ||
878 | |||
879 | return 0; | ||
880 | } | ||
881 | |||
882 | /* | ||
883 | * We reference the rpc_cred of the first WRITE that triggers the need for | ||
884 | * a LAYOUTCOMMIT, and use it to send the layoutcommit compound. | ||
885 | * rfc5661 is not clear about which credential should be used. | ||
886 | * | ||
887 | * Flexlayout client should treat DS replied FILE_SYNC as DATA_SYNC, so | ||
888 | * to follow http://www.rfc-editor.org/errata_search.php?rfc=5661&eid=2751 | ||
889 | * we always send layoutcommit after DS writes. | ||
890 | */ | ||
891 | static void | ||
892 | ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) | ||
893 | { | ||
894 | pnfs_set_layoutcommit(hdr); | ||
895 | dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, | ||
896 | (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); | ||
897 | } | ||
898 | |||
899 | static bool | ||
900 | ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx) | ||
901 | { | ||
902 | /* No mirroring for now */ | ||
903 | struct nfs4_deviceid_node *node = FF_LAYOUT_DEVID_NODE(lseg, idx); | ||
904 | |||
905 | return ff_layout_test_devid_unavailable(node); | ||
906 | } | ||
907 | |||
908 | static int ff_layout_read_prepare_common(struct rpc_task *task, | ||
909 | struct nfs_pgio_header *hdr) | ||
910 | { | ||
911 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { | ||
912 | rpc_exit(task, -EIO); | ||
913 | return -EIO; | ||
914 | } | ||
915 | if (ff_layout_reset_to_mds(hdr->lseg, hdr->pgio_mirror_idx)) { | ||
916 | dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); | ||
917 | if (ff_layout_has_available_ds(hdr->lseg)) | ||
918 | pnfs_read_resend_pnfs(hdr); | ||
919 | else | ||
920 | ff_layout_reset_read(hdr); | ||
921 | rpc_exit(task, 0); | ||
922 | return -EAGAIN; | ||
923 | } | ||
924 | hdr->pgio_done_cb = ff_layout_read_done_cb; | ||
925 | |||
926 | return 0; | ||
927 | } | ||
928 | |||
929 | /* | ||
930 | * Call ops for the async read/write cases | ||
931 | * In the case of dense layouts, the offset needs to be reset to its | ||
932 | * original value. | ||
933 | */ | ||
934 | static void ff_layout_read_prepare_v3(struct rpc_task *task, void *data) | ||
935 | { | ||
936 | struct nfs_pgio_header *hdr = data; | ||
937 | |||
938 | if (ff_layout_read_prepare_common(task, hdr)) | ||
939 | return; | ||
940 | |||
941 | rpc_call_start(task); | ||
942 | } | ||
943 | |||
944 | static int ff_layout_setup_sequence(struct nfs_client *ds_clp, | ||
945 | struct nfs4_sequence_args *args, | ||
946 | struct nfs4_sequence_res *res, | ||
947 | struct rpc_task *task) | ||
948 | { | ||
949 | if (ds_clp->cl_session) | ||
950 | return nfs41_setup_sequence(ds_clp->cl_session, | ||
951 | args, | ||
952 | res, | ||
953 | task); | ||
954 | return nfs40_setup_sequence(ds_clp->cl_slot_tbl, | ||
955 | args, | ||
956 | res, | ||
957 | task); | ||
958 | } | ||
959 | |||
960 | static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) | ||
961 | { | ||
962 | struct nfs_pgio_header *hdr = data; | ||
963 | |||
964 | if (ff_layout_read_prepare_common(task, hdr)) | ||
965 | return; | ||
966 | |||
967 | if (ff_layout_setup_sequence(hdr->ds_clp, | ||
968 | &hdr->args.seq_args, | ||
969 | &hdr->res.seq_res, | ||
970 | task)) | ||
971 | return; | ||
972 | |||
973 | if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, | ||
974 | hdr->args.lock_context, FMODE_READ) == -EIO) | ||
975 | rpc_exit(task, -EIO); /* lost lock, terminate I/O */ | ||
976 | } | ||
977 | |||
978 | static void ff_layout_read_call_done(struct rpc_task *task, void *data) | ||
979 | { | ||
980 | struct nfs_pgio_header *hdr = data; | ||
981 | |||
982 | dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); | ||
983 | |||
984 | if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && | ||
985 | task->tk_status == 0) { | ||
986 | nfs4_sequence_done(task, &hdr->res.seq_res); | ||
987 | return; | ||
988 | } | ||
989 | |||
990 | /* Note this may cause RPC to be resent */ | ||
991 | hdr->mds_ops->rpc_call_done(task, hdr); | ||
992 | } | ||
993 | |||
994 | static void ff_layout_read_count_stats(struct rpc_task *task, void *data) | ||
995 | { | ||
996 | struct nfs_pgio_header *hdr = data; | ||
997 | |||
998 | rpc_count_iostats_metrics(task, | ||
999 | &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]); | ||
1000 | } | ||
1001 | |||
1002 | static int ff_layout_write_done_cb(struct rpc_task *task, | ||
1003 | struct nfs_pgio_header *hdr) | ||
1004 | { | ||
1005 | struct inode *inode; | ||
1006 | int err; | ||
1007 | |||
1008 | trace_nfs4_pnfs_write(hdr, task->tk_status); | ||
1009 | if (task->tk_status == -ETIMEDOUT && !hdr->res.op_status) | ||
1010 | hdr->res.op_status = NFS4ERR_NXIO; | ||
1011 | if (task->tk_status < 0 && hdr->res.op_status) | ||
1012 | ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, | ||
1013 | hdr->args.offset, hdr->args.count, | ||
1014 | hdr->res.op_status, OP_WRITE); | ||
1015 | err = ff_layout_async_handle_error(task, hdr->args.context->state, | ||
1016 | hdr->ds_clp, hdr->lseg, | ||
1017 | hdr->pgio_mirror_idx); | ||
1018 | |||
1019 | switch (err) { | ||
1020 | case -NFS4ERR_RESET_TO_PNFS: | ||
1021 | case -NFS4ERR_RESET_TO_MDS: | ||
1022 | inode = hdr->lseg->pls_layout->plh_inode; | ||
1023 | pnfs_error_mark_layout_for_return(inode, hdr->lseg); | ||
1024 | if (err == -NFS4ERR_RESET_TO_PNFS) { | ||
1025 | pnfs_set_retry_layoutget(hdr->lseg->pls_layout); | ||
1026 | ff_layout_reset_write(hdr, true); | ||
1027 | } else { | ||
1028 | pnfs_clear_retry_layoutget(hdr->lseg->pls_layout); | ||
1029 | ff_layout_reset_write(hdr, false); | ||
1030 | } | ||
1031 | return task->tk_status; | ||
1032 | case -EAGAIN: | ||
1033 | rpc_restart_call_prepare(task); | ||
1034 | return -EAGAIN; | ||
1035 | } | ||
1036 | |||
1037 | if (hdr->res.verf->committed == NFS_FILE_SYNC || | ||
1038 | hdr->res.verf->committed == NFS_DATA_SYNC) | ||
1039 | ff_layout_set_layoutcommit(hdr); | ||
1040 | |||
1041 | return 0; | ||
1042 | } | ||
1043 | |||
1044 | static int ff_layout_commit_done_cb(struct rpc_task *task, | ||
1045 | struct nfs_commit_data *data) | ||
1046 | { | ||
1047 | struct inode *inode; | ||
1048 | int err; | ||
1049 | |||
1050 | trace_nfs4_pnfs_commit_ds(data, task->tk_status); | ||
1051 | if (task->tk_status == -ETIMEDOUT && !data->res.op_status) | ||
1052 | data->res.op_status = NFS4ERR_NXIO; | ||
1053 | if (task->tk_status < 0 && data->res.op_status) | ||
1054 | ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index, | ||
1055 | data->args.offset, data->args.count, | ||
1056 | data->res.op_status, OP_COMMIT); | ||
1057 | err = ff_layout_async_handle_error(task, NULL, data->ds_clp, | ||
1058 | data->lseg, data->ds_commit_index); | ||
1059 | |||
1060 | switch (err) { | ||
1061 | case -NFS4ERR_RESET_TO_PNFS: | ||
1062 | case -NFS4ERR_RESET_TO_MDS: | ||
1063 | inode = data->lseg->pls_layout->plh_inode; | ||
1064 | pnfs_error_mark_layout_for_return(inode, data->lseg); | ||
1065 | if (err == -NFS4ERR_RESET_TO_PNFS) | ||
1066 | pnfs_set_retry_layoutget(data->lseg->pls_layout); | ||
1067 | else | ||
1068 | pnfs_clear_retry_layoutget(data->lseg->pls_layout); | ||
1069 | pnfs_generic_prepare_to_resend_writes(data); | ||
1070 | return -EAGAIN; | ||
1071 | case -EAGAIN: | ||
1072 | rpc_restart_call_prepare(task); | ||
1073 | return -EAGAIN; | ||
1074 | } | ||
1075 | |||
1076 | if (data->verf.committed == NFS_UNSTABLE) | ||
1077 | pnfs_commit_set_layoutcommit(data); | ||
1078 | |||
1079 | return 0; | ||
1080 | } | ||
1081 | |||
1082 | static int ff_layout_write_prepare_common(struct rpc_task *task, | ||
1083 | struct nfs_pgio_header *hdr) | ||
1084 | { | ||
1085 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { | ||
1086 | rpc_exit(task, -EIO); | ||
1087 | return -EIO; | ||
1088 | } | ||
1089 | |||
1090 | if (ff_layout_reset_to_mds(hdr->lseg, hdr->pgio_mirror_idx)) { | ||
1091 | bool retry_pnfs; | ||
1092 | |||
1093 | retry_pnfs = ff_layout_has_available_ds(hdr->lseg); | ||
1094 | dprintk("%s task %u reset io to %s\n", __func__, | ||
1095 | task->tk_pid, retry_pnfs ? "pNFS" : "MDS"); | ||
1096 | ff_layout_reset_write(hdr, retry_pnfs); | ||
1097 | rpc_exit(task, 0); | ||
1098 | return -EAGAIN; | ||
1099 | } | ||
1100 | |||
1101 | return 0; | ||
1102 | } | ||
1103 | |||
1104 | static void ff_layout_write_prepare_v3(struct rpc_task *task, void *data) | ||
1105 | { | ||
1106 | struct nfs_pgio_header *hdr = data; | ||
1107 | |||
1108 | if (ff_layout_write_prepare_common(task, hdr)) | ||
1109 | return; | ||
1110 | |||
1111 | rpc_call_start(task); | ||
1112 | } | ||
1113 | |||
1114 | static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data) | ||
1115 | { | ||
1116 | struct nfs_pgio_header *hdr = data; | ||
1117 | |||
1118 | if (ff_layout_write_prepare_common(task, hdr)) | ||
1119 | return; | ||
1120 | |||
1121 | if (ff_layout_setup_sequence(hdr->ds_clp, | ||
1122 | &hdr->args.seq_args, | ||
1123 | &hdr->res.seq_res, | ||
1124 | task)) | ||
1125 | return; | ||
1126 | |||
1127 | if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, | ||
1128 | hdr->args.lock_context, FMODE_WRITE) == -EIO) | ||
1129 | rpc_exit(task, -EIO); /* lost lock, terminate I/O */ | ||
1130 | } | ||
1131 | |||
1132 | static void ff_layout_write_call_done(struct rpc_task *task, void *data) | ||
1133 | { | ||
1134 | struct nfs_pgio_header *hdr = data; | ||
1135 | |||
1136 | if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && | ||
1137 | task->tk_status == 0) { | ||
1138 | nfs4_sequence_done(task, &hdr->res.seq_res); | ||
1139 | return; | ||
1140 | } | ||
1141 | |||
1142 | /* Note this may cause RPC to be resent */ | ||
1143 | hdr->mds_ops->rpc_call_done(task, hdr); | ||
1144 | } | ||
1145 | |||
1146 | static void ff_layout_write_count_stats(struct rpc_task *task, void *data) | ||
1147 | { | ||
1148 | struct nfs_pgio_header *hdr = data; | ||
1149 | |||
1150 | rpc_count_iostats_metrics(task, | ||
1151 | &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); | ||
1152 | } | ||
1153 | |||
1154 | static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) | ||
1155 | { | ||
1156 | rpc_call_start(task); | ||
1157 | } | ||
1158 | |||
1159 | static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data) | ||
1160 | { | ||
1161 | struct nfs_commit_data *wdata = data; | ||
1162 | |||
1163 | ff_layout_setup_sequence(wdata->ds_clp, | ||
1164 | &wdata->args.seq_args, | ||
1165 | &wdata->res.seq_res, | ||
1166 | task); | ||
1167 | } | ||
1168 | |||
1169 | static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) | ||
1170 | { | ||
1171 | struct nfs_commit_data *cdata = data; | ||
1172 | |||
1173 | rpc_count_iostats_metrics(task, | ||
1174 | &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]); | ||
1175 | } | ||
1176 | |||
1177 | static const struct rpc_call_ops ff_layout_read_call_ops_v3 = { | ||
1178 | .rpc_call_prepare = ff_layout_read_prepare_v3, | ||
1179 | .rpc_call_done = ff_layout_read_call_done, | ||
1180 | .rpc_count_stats = ff_layout_read_count_stats, | ||
1181 | .rpc_release = pnfs_generic_rw_release, | ||
1182 | }; | ||
1183 | |||
1184 | static const struct rpc_call_ops ff_layout_read_call_ops_v4 = { | ||
1185 | .rpc_call_prepare = ff_layout_read_prepare_v4, | ||
1186 | .rpc_call_done = ff_layout_read_call_done, | ||
1187 | .rpc_count_stats = ff_layout_read_count_stats, | ||
1188 | .rpc_release = pnfs_generic_rw_release, | ||
1189 | }; | ||
1190 | |||
1191 | static const struct rpc_call_ops ff_layout_write_call_ops_v3 = { | ||
1192 | .rpc_call_prepare = ff_layout_write_prepare_v3, | ||
1193 | .rpc_call_done = ff_layout_write_call_done, | ||
1194 | .rpc_count_stats = ff_layout_write_count_stats, | ||
1195 | .rpc_release = pnfs_generic_rw_release, | ||
1196 | }; | ||
1197 | |||
1198 | static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { | ||
1199 | .rpc_call_prepare = ff_layout_write_prepare_v4, | ||
1200 | .rpc_call_done = ff_layout_write_call_done, | ||
1201 | .rpc_count_stats = ff_layout_write_count_stats, | ||
1202 | .rpc_release = pnfs_generic_rw_release, | ||
1203 | }; | ||
1204 | |||
1205 | static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = { | ||
1206 | .rpc_call_prepare = ff_layout_commit_prepare_v3, | ||
1207 | .rpc_call_done = pnfs_generic_write_commit_done, | ||
1208 | .rpc_count_stats = ff_layout_commit_count_stats, | ||
1209 | .rpc_release = pnfs_generic_commit_release, | ||
1210 | }; | ||
1211 | |||
1212 | static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = { | ||
1213 | .rpc_call_prepare = ff_layout_commit_prepare_v4, | ||
1214 | .rpc_call_done = pnfs_generic_write_commit_done, | ||
1215 | .rpc_count_stats = ff_layout_commit_count_stats, | ||
1216 | .rpc_release = pnfs_generic_commit_release, | ||
1217 | }; | ||
1218 | |||
1219 | static enum pnfs_try_status | ||
1220 | ff_layout_read_pagelist(struct nfs_pgio_header *hdr) | ||
1221 | { | ||
1222 | struct pnfs_layout_segment *lseg = hdr->lseg; | ||
1223 | struct nfs4_pnfs_ds *ds; | ||
1224 | struct rpc_clnt *ds_clnt; | ||
1225 | struct rpc_cred *ds_cred; | ||
1226 | loff_t offset = hdr->args.offset; | ||
1227 | u32 idx = hdr->pgio_mirror_idx; | ||
1228 | int vers; | ||
1229 | struct nfs_fh *fh; | ||
1230 | |||
1231 | dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", | ||
1232 | __func__, hdr->inode->i_ino, | ||
1233 | hdr->args.pgbase, (size_t)hdr->args.count, offset); | ||
1234 | |||
1235 | ds = nfs4_ff_layout_prepare_ds(lseg, idx, false); | ||
1236 | if (!ds) | ||
1237 | goto out_failed; | ||
1238 | |||
1239 | ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, | ||
1240 | hdr->inode); | ||
1241 | if (IS_ERR(ds_clnt)) | ||
1242 | goto out_failed; | ||
1243 | |||
1244 | ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); | ||
1245 | if (IS_ERR(ds_cred)) | ||
1246 | goto out_failed; | ||
1247 | |||
1248 | vers = nfs4_ff_layout_ds_version(lseg, idx); | ||
1249 | |||
1250 | dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__, | ||
1251 | ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), vers); | ||
1252 | |||
1253 | atomic_inc(&ds->ds_clp->cl_count); | ||
1254 | hdr->ds_clp = ds->ds_clp; | ||
1255 | fh = nfs4_ff_layout_select_ds_fh(lseg, idx); | ||
1256 | if (fh) | ||
1257 | hdr->args.fh = fh; | ||
1258 | |||
1259 | /* | ||
1260 | * Note that if we ever decide to split across DSes, | ||
1261 | * then we may need to handle dense-like offsets. | ||
1262 | */ | ||
1263 | hdr->args.offset = offset; | ||
1264 | hdr->mds_offset = offset; | ||
1265 | |||
1266 | /* Perform an asynchronous read to ds */ | ||
1267 | nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops, | ||
1268 | vers == 3 ? &ff_layout_read_call_ops_v3 : | ||
1269 | &ff_layout_read_call_ops_v4, | ||
1270 | 0, RPC_TASK_SOFTCONN); | ||
1271 | |||
1272 | return PNFS_ATTEMPTED; | ||
1273 | |||
1274 | out_failed: | ||
1275 | if (ff_layout_has_available_ds(lseg)) | ||
1276 | return PNFS_TRY_AGAIN; | ||
1277 | return PNFS_NOT_ATTEMPTED; | ||
1278 | } | ||
1279 | |||
1280 | /* Perform async writes. */ | ||
1281 | static enum pnfs_try_status | ||
1282 | ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) | ||
1283 | { | ||
1284 | struct pnfs_layout_segment *lseg = hdr->lseg; | ||
1285 | struct nfs4_pnfs_ds *ds; | ||
1286 | struct rpc_clnt *ds_clnt; | ||
1287 | struct rpc_cred *ds_cred; | ||
1288 | loff_t offset = hdr->args.offset; | ||
1289 | int vers; | ||
1290 | struct nfs_fh *fh; | ||
1291 | int idx = hdr->pgio_mirror_idx; | ||
1292 | |||
1293 | ds = nfs4_ff_layout_prepare_ds(lseg, idx, true); | ||
1294 | if (!ds) | ||
1295 | return PNFS_NOT_ATTEMPTED; | ||
1296 | |||
1297 | ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, | ||
1298 | hdr->inode); | ||
1299 | if (IS_ERR(ds_clnt)) | ||
1300 | return PNFS_NOT_ATTEMPTED; | ||
1301 | |||
1302 | ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); | ||
1303 | if (IS_ERR(ds_cred)) | ||
1304 | return PNFS_NOT_ATTEMPTED; | ||
1305 | |||
1306 | vers = nfs4_ff_layout_ds_version(lseg, idx); | ||
1307 | |||
1308 | dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d vers %d\n", | ||
1309 | __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, | ||
1310 | offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), | ||
1311 | vers); | ||
1312 | |||
1313 | hdr->pgio_done_cb = ff_layout_write_done_cb; | ||
1314 | atomic_inc(&ds->ds_clp->cl_count); | ||
1315 | hdr->ds_clp = ds->ds_clp; | ||
1316 | hdr->ds_commit_idx = idx; | ||
1317 | fh = nfs4_ff_layout_select_ds_fh(lseg, idx); | ||
1318 | if (fh) | ||
1319 | hdr->args.fh = fh; | ||
1320 | |||
1321 | /* | ||
1322 | * Note that if we ever decide to split across DSes, | ||
1323 | * then we may need to handle dense-like offsets. | ||
1324 | */ | ||
1325 | hdr->args.offset = offset; | ||
1326 | |||
1327 | /* Perform an asynchronous write */ | ||
1328 | nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops, | ||
1329 | vers == 3 ? &ff_layout_write_call_ops_v3 : | ||
1330 | &ff_layout_write_call_ops_v4, | ||
1331 | sync, RPC_TASK_SOFTCONN); | ||
1332 | return PNFS_ATTEMPTED; | ||
1333 | } | ||
1334 | |||
1335 | static void | ||
1336 | ff_layout_mark_request_commit(struct nfs_page *req, | ||
1337 | struct pnfs_layout_segment *lseg, | ||
1338 | struct nfs_commit_info *cinfo, | ||
1339 | u32 ds_commit_idx) | ||
1340 | { | ||
1341 | struct list_head *list; | ||
1342 | struct pnfs_commit_bucket *buckets; | ||
1343 | |||
1344 | spin_lock(cinfo->lock); | ||
1345 | buckets = cinfo->ds->buckets; | ||
1346 | list = &buckets[ds_commit_idx].written; | ||
1347 | if (list_empty(list)) { | ||
1348 | /* Non-empty buckets hold a reference on the lseg. That ref | ||
1349 | * is normally transferred to the COMMIT call and released | ||
1350 | * there. It could also be released if the last req is pulled | ||
1351 | * off due to a rewrite, in which case it will be done in | ||
1352 | * pnfs_common_clear_request_commit | ||
1353 | */ | ||
1354 | WARN_ON_ONCE(buckets[ds_commit_idx].wlseg != NULL); | ||
1355 | buckets[ds_commit_idx].wlseg = pnfs_get_lseg(lseg); | ||
1356 | } | ||
1357 | set_bit(PG_COMMIT_TO_DS, &req->wb_flags); | ||
1358 | cinfo->ds->nwritten++; | ||
1359 | |||
1360 | /* nfs_request_add_commit_list(). We need to add req to list without | ||
1361 | * dropping cinfo lock. | ||
1362 | */ | ||
1363 | set_bit(PG_CLEAN, &(req)->wb_flags); | ||
1364 | nfs_list_add_request(req, list); | ||
1365 | cinfo->mds->ncommit++; | ||
1366 | spin_unlock(cinfo->lock); | ||
1367 | if (!cinfo->dreq) { | ||
1368 | inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); | ||
1369 | inc_bdi_stat(inode_to_bdi(page_file_mapping(req->wb_page)->host), | ||
1370 | BDI_RECLAIMABLE); | ||
1371 | __mark_inode_dirty(req->wb_context->dentry->d_inode, | ||
1372 | I_DIRTY_DATASYNC); | ||
1373 | } | ||
1374 | } | ||
1375 | |||
1376 | static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) | ||
1377 | { | ||
1378 | return i; | ||
1379 | } | ||
1380 | |||
1381 | static struct nfs_fh * | ||
1382 | select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i) | ||
1383 | { | ||
1384 | struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg); | ||
1385 | |||
1386 | /* FIXME: Assume that there is only one NFS version available | ||
1387 | * for the DS. | ||
1388 | */ | ||
1389 | return &flseg->mirror_array[i]->fh_versions[0]; | ||
1390 | } | ||
1391 | |||
1392 | static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) | ||
1393 | { | ||
1394 | struct pnfs_layout_segment *lseg = data->lseg; | ||
1395 | struct nfs4_pnfs_ds *ds; | ||
1396 | struct rpc_clnt *ds_clnt; | ||
1397 | struct rpc_cred *ds_cred; | ||
1398 | u32 idx; | ||
1399 | int vers; | ||
1400 | struct nfs_fh *fh; | ||
1401 | |||
1402 | idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); | ||
1403 | ds = nfs4_ff_layout_prepare_ds(lseg, idx, true); | ||
1404 | if (!ds) | ||
1405 | goto out_err; | ||
1406 | |||
1407 | ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, | ||
1408 | data->inode); | ||
1409 | if (IS_ERR(ds_clnt)) | ||
1410 | goto out_err; | ||
1411 | |||
1412 | ds_cred = ff_layout_get_ds_cred(lseg, idx, data->cred); | ||
1413 | if (IS_ERR(ds_cred)) | ||
1414 | goto out_err; | ||
1415 | |||
1416 | vers = nfs4_ff_layout_ds_version(lseg, idx); | ||
1417 | |||
1418 | dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__, | ||
1419 | data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count), | ||
1420 | vers); | ||
1421 | data->commit_done_cb = ff_layout_commit_done_cb; | ||
1422 | data->cred = ds_cred; | ||
1423 | atomic_inc(&ds->ds_clp->cl_count); | ||
1424 | data->ds_clp = ds->ds_clp; | ||
1425 | fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); | ||
1426 | if (fh) | ||
1427 | data->args.fh = fh; | ||
1428 | return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, | ||
1429 | vers == 3 ? &ff_layout_commit_call_ops_v3 : | ||
1430 | &ff_layout_commit_call_ops_v4, | ||
1431 | how, RPC_TASK_SOFTCONN); | ||
1432 | out_err: | ||
1433 | pnfs_generic_prepare_to_resend_writes(data); | ||
1434 | pnfs_generic_commit_release(data); | ||
1435 | return -EAGAIN; | ||
1436 | } | ||
1437 | |||
1438 | static int | ||
1439 | ff_layout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, | ||
1440 | int how, struct nfs_commit_info *cinfo) | ||
1441 | { | ||
1442 | return pnfs_generic_commit_pagelist(inode, mds_pages, how, cinfo, | ||
1443 | ff_layout_initiate_commit); | ||
1444 | } | ||
1445 | |||
1446 | static struct pnfs_ds_commit_info * | ||
1447 | ff_layout_get_ds_info(struct inode *inode) | ||
1448 | { | ||
1449 | struct pnfs_layout_hdr *layout = NFS_I(inode)->layout; | ||
1450 | |||
1451 | if (layout == NULL) | ||
1452 | return NULL; | ||
1453 | |||
1454 | return &FF_LAYOUT_FROM_HDR(layout)->commit_info; | ||
1455 | } | ||
1456 | |||
1457 | static void | ||
1458 | ff_layout_free_deveiceid_node(struct nfs4_deviceid_node *d) | ||
1459 | { | ||
1460 | nfs4_ff_layout_free_deviceid(container_of(d, struct nfs4_ff_layout_ds, | ||
1461 | id_node)); | ||
1462 | } | ||
1463 | |||
1464 | static int ff_layout_encode_ioerr(struct nfs4_flexfile_layout *flo, | ||
1465 | struct xdr_stream *xdr, | ||
1466 | const struct nfs4_layoutreturn_args *args) | ||
1467 | { | ||
1468 | struct pnfs_layout_hdr *hdr = &flo->generic_hdr; | ||
1469 | __be32 *start; | ||
1470 | int count = 0, ret = 0; | ||
1471 | |||
1472 | start = xdr_reserve_space(xdr, 4); | ||
1473 | if (unlikely(!start)) | ||
1474 | return -E2BIG; | ||
1475 | |||
1476 | /* This assume we always return _ALL_ layouts */ | ||
1477 | spin_lock(&hdr->plh_inode->i_lock); | ||
1478 | ret = ff_layout_encode_ds_ioerr(flo, xdr, &count, &args->range); | ||
1479 | spin_unlock(&hdr->plh_inode->i_lock); | ||
1480 | |||
1481 | *start = cpu_to_be32(count); | ||
1482 | |||
1483 | return ret; | ||
1484 | } | ||
1485 | |||
1486 | /* report nothing for now */ | ||
1487 | static void ff_layout_encode_iostats(struct nfs4_flexfile_layout *flo, | ||
1488 | struct xdr_stream *xdr, | ||
1489 | const struct nfs4_layoutreturn_args *args) | ||
1490 | { | ||
1491 | __be32 *p; | ||
1492 | |||
1493 | p = xdr_reserve_space(xdr, 4); | ||
1494 | if (likely(p)) | ||
1495 | *p = cpu_to_be32(0); | ||
1496 | } | ||
1497 | |||
1498 | static struct nfs4_deviceid_node * | ||
1499 | ff_layout_alloc_deviceid_node(struct nfs_server *server, | ||
1500 | struct pnfs_device *pdev, gfp_t gfp_flags) | ||
1501 | { | ||
1502 | struct nfs4_ff_layout_ds *dsaddr; | ||
1503 | |||
1504 | dsaddr = nfs4_ff_alloc_deviceid_node(server, pdev, gfp_flags); | ||
1505 | if (!dsaddr) | ||
1506 | return NULL; | ||
1507 | return &dsaddr->id_node; | ||
1508 | } | ||
1509 | |||
1510 | static void | ||
1511 | ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo, | ||
1512 | struct xdr_stream *xdr, | ||
1513 | const struct nfs4_layoutreturn_args *args) | ||
1514 | { | ||
1515 | struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo); | ||
1516 | __be32 *start; | ||
1517 | |||
1518 | dprintk("%s: Begin\n", __func__); | ||
1519 | start = xdr_reserve_space(xdr, 4); | ||
1520 | BUG_ON(!start); | ||
1521 | |||
1522 | if (ff_layout_encode_ioerr(flo, xdr, args)) | ||
1523 | goto out; | ||
1524 | |||
1525 | ff_layout_encode_iostats(flo, xdr, args); | ||
1526 | out: | ||
1527 | *start = cpu_to_be32((xdr->p - start - 1) * 4); | ||
1528 | dprintk("%s: Return\n", __func__); | ||
1529 | } | ||
1530 | |||
1531 | static struct pnfs_layoutdriver_type flexfilelayout_type = { | ||
1532 | .id = LAYOUT_FLEX_FILES, | ||
1533 | .name = "LAYOUT_FLEX_FILES", | ||
1534 | .owner = THIS_MODULE, | ||
1535 | .alloc_layout_hdr = ff_layout_alloc_layout_hdr, | ||
1536 | .free_layout_hdr = ff_layout_free_layout_hdr, | ||
1537 | .alloc_lseg = ff_layout_alloc_lseg, | ||
1538 | .free_lseg = ff_layout_free_lseg, | ||
1539 | .pg_read_ops = &ff_layout_pg_read_ops, | ||
1540 | .pg_write_ops = &ff_layout_pg_write_ops, | ||
1541 | .get_ds_info = ff_layout_get_ds_info, | ||
1542 | .free_deviceid_node = ff_layout_free_deveiceid_node, | ||
1543 | .mark_request_commit = ff_layout_mark_request_commit, | ||
1544 | .clear_request_commit = pnfs_generic_clear_request_commit, | ||
1545 | .scan_commit_lists = pnfs_generic_scan_commit_lists, | ||
1546 | .recover_commit_reqs = pnfs_generic_recover_commit_reqs, | ||
1547 | .commit_pagelist = ff_layout_commit_pagelist, | ||
1548 | .read_pagelist = ff_layout_read_pagelist, | ||
1549 | .write_pagelist = ff_layout_write_pagelist, | ||
1550 | .alloc_deviceid_node = ff_layout_alloc_deviceid_node, | ||
1551 | .encode_layoutreturn = ff_layout_encode_layoutreturn, | ||
1552 | }; | ||
1553 | |||
1554 | static int __init nfs4flexfilelayout_init(void) | ||
1555 | { | ||
1556 | printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Registering...\n", | ||
1557 | __func__); | ||
1558 | return pnfs_register_layoutdriver(&flexfilelayout_type); | ||
1559 | } | ||
1560 | |||
1561 | static void __exit nfs4flexfilelayout_exit(void) | ||
1562 | { | ||
1563 | printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Unregistering...\n", | ||
1564 | __func__); | ||
1565 | pnfs_unregister_layoutdriver(&flexfilelayout_type); | ||
1566 | } | ||
1567 | |||
1568 | MODULE_ALIAS("nfs-layouttype4-4"); | ||
1569 | |||
1570 | MODULE_LICENSE("GPL"); | ||
1571 | MODULE_DESCRIPTION("The NFSv4 flexfile layout driver"); | ||
1572 | |||
1573 | module_init(nfs4flexfilelayout_init); | ||
1574 | module_exit(nfs4flexfilelayout_exit); | ||
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h new file mode 100644 index 000000000000..070f20445b2d --- /dev/null +++ b/fs/nfs/flexfilelayout/flexfilelayout.h | |||
@@ -0,0 +1,155 @@ | |||
1 | /* | ||
2 | * NFSv4 flexfile layout driver data structures. | ||
3 | * | ||
4 | * Copyright (c) 2014, Primary Data, Inc. All rights reserved. | ||
5 | * | ||
6 | * Tao Peng <bergwolf@primarydata.com> | ||
7 | */ | ||
8 | |||
9 | #ifndef FS_NFS_NFS4FLEXFILELAYOUT_H | ||
10 | #define FS_NFS_NFS4FLEXFILELAYOUT_H | ||
11 | |||
12 | #include "../pnfs.h" | ||
13 | |||
14 | /* XXX: Let's filter out insanely large mirror count for now to avoid oom | ||
15 | * due to network error etc. */ | ||
16 | #define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096 | ||
17 | |||
18 | struct nfs4_ff_ds_version { | ||
19 | u32 version; | ||
20 | u32 minor_version; | ||
21 | u32 rsize; | ||
22 | u32 wsize; | ||
23 | bool tightly_coupled; | ||
24 | }; | ||
25 | |||
26 | /* chained in global deviceid hlist */ | ||
27 | struct nfs4_ff_layout_ds { | ||
28 | struct nfs4_deviceid_node id_node; | ||
29 | u32 ds_versions_cnt; | ||
30 | struct nfs4_ff_ds_version *ds_versions; | ||
31 | struct nfs4_pnfs_ds *ds; | ||
32 | }; | ||
33 | |||
34 | struct nfs4_ff_layout_ds_err { | ||
35 | struct list_head list; /* linked in mirror error_list */ | ||
36 | u64 offset; | ||
37 | u64 length; | ||
38 | int status; | ||
39 | enum nfs_opnum4 opnum; | ||
40 | nfs4_stateid stateid; | ||
41 | struct nfs4_deviceid deviceid; | ||
42 | }; | ||
43 | |||
44 | struct nfs4_ff_layout_mirror { | ||
45 | u32 ds_count; | ||
46 | u32 efficiency; | ||
47 | struct nfs4_ff_layout_ds *mirror_ds; | ||
48 | u32 fh_versions_cnt; | ||
49 | struct nfs_fh *fh_versions; | ||
50 | nfs4_stateid stateid; | ||
51 | struct nfs4_string user_name; | ||
52 | struct nfs4_string group_name; | ||
53 | u32 uid; | ||
54 | u32 gid; | ||
55 | struct rpc_cred *cred; | ||
56 | spinlock_t lock; | ||
57 | }; | ||
58 | |||
59 | struct nfs4_ff_layout_segment { | ||
60 | struct pnfs_layout_segment generic_hdr; | ||
61 | u64 stripe_unit; | ||
62 | u32 mirror_array_cnt; | ||
63 | struct nfs4_ff_layout_mirror **mirror_array; | ||
64 | }; | ||
65 | |||
66 | struct nfs4_flexfile_layout { | ||
67 | struct pnfs_layout_hdr generic_hdr; | ||
68 | struct pnfs_ds_commit_info commit_info; | ||
69 | struct list_head error_list; /* nfs4_ff_layout_ds_err */ | ||
70 | }; | ||
71 | |||
72 | static inline struct nfs4_flexfile_layout * | ||
73 | FF_LAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo) | ||
74 | { | ||
75 | return container_of(lo, struct nfs4_flexfile_layout, generic_hdr); | ||
76 | } | ||
77 | |||
78 | static inline struct nfs4_ff_layout_segment * | ||
79 | FF_LAYOUT_LSEG(struct pnfs_layout_segment *lseg) | ||
80 | { | ||
81 | return container_of(lseg, | ||
82 | struct nfs4_ff_layout_segment, | ||
83 | generic_hdr); | ||
84 | } | ||
85 | |||
86 | static inline struct nfs4_deviceid_node * | ||
87 | FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx) | ||
88 | { | ||
89 | if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt || | ||
90 | FF_LAYOUT_LSEG(lseg)->mirror_array[idx] == NULL || | ||
91 | FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds == NULL) | ||
92 | return NULL; | ||
93 | return &FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds->id_node; | ||
94 | } | ||
95 | |||
96 | static inline struct nfs4_ff_layout_ds * | ||
97 | FF_LAYOUT_MIRROR_DS(struct nfs4_deviceid_node *node) | ||
98 | { | ||
99 | return container_of(node, struct nfs4_ff_layout_ds, id_node); | ||
100 | } | ||
101 | |||
102 | static inline struct nfs4_ff_layout_mirror * | ||
103 | FF_LAYOUT_COMP(struct pnfs_layout_segment *lseg, u32 idx) | ||
104 | { | ||
105 | if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt) | ||
106 | return NULL; | ||
107 | return FF_LAYOUT_LSEG(lseg)->mirror_array[idx]; | ||
108 | } | ||
109 | |||
110 | static inline u32 | ||
111 | FF_LAYOUT_MIRROR_COUNT(struct pnfs_layout_segment *lseg) | ||
112 | { | ||
113 | return FF_LAYOUT_LSEG(lseg)->mirror_array_cnt; | ||
114 | } | ||
115 | |||
116 | static inline bool | ||
117 | ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node) | ||
118 | { | ||
119 | return nfs4_test_deviceid_unavailable(node); | ||
120 | } | ||
121 | |||
122 | static inline int | ||
123 | nfs4_ff_layout_ds_version(struct pnfs_layout_segment *lseg, u32 ds_idx) | ||
124 | { | ||
125 | return FF_LAYOUT_COMP(lseg, ds_idx)->mirror_ds->ds_versions[0].version; | ||
126 | } | ||
127 | |||
128 | struct nfs4_ff_layout_ds * | ||
129 | nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | ||
130 | gfp_t gfp_flags); | ||
131 | void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds); | ||
132 | void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds); | ||
133 | int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, | ||
134 | struct nfs4_ff_layout_mirror *mirror, u64 offset, | ||
135 | u64 length, int status, enum nfs_opnum4 opnum, | ||
136 | gfp_t gfp_flags); | ||
137 | int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, | ||
138 | struct xdr_stream *xdr, int *count, | ||
139 | const struct pnfs_layout_range *range); | ||
140 | struct nfs_fh * | ||
141 | nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx); | ||
142 | |||
143 | struct nfs4_pnfs_ds * | ||
144 | nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | ||
145 | bool fail_return); | ||
146 | |||
147 | struct rpc_clnt * | ||
148 | nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, | ||
149 | u32 ds_idx, | ||
150 | struct nfs_client *ds_clp, | ||
151 | struct inode *inode); | ||
152 | struct rpc_cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, | ||
153 | u32 ds_idx, struct rpc_cred *mdscred); | ||
154 | bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); | ||
155 | #endif /* FS_NFS_NFS4FLEXFILELAYOUT_H */ | ||
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c new file mode 100644 index 000000000000..e2c01f204a95 --- /dev/null +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c | |||
@@ -0,0 +1,552 @@ | |||
1 | /* | ||
2 | * Device operations for the pnfs nfs4 file layout driver. | ||
3 | * | ||
4 | * Copyright (c) 2014, Primary Data, Inc. All rights reserved. | ||
5 | * | ||
6 | * Tao Peng <bergwolf@primarydata.com> | ||
7 | */ | ||
8 | |||
9 | #include <linux/nfs_fs.h> | ||
10 | #include <linux/vmalloc.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/sunrpc/addr.h> | ||
13 | |||
14 | #include "../internal.h" | ||
15 | #include "../nfs4session.h" | ||
16 | #include "flexfilelayout.h" | ||
17 | |||
18 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
19 | |||
20 | static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; | ||
21 | static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; | ||
22 | |||
23 | void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds) | ||
24 | { | ||
25 | if (mirror_ds) | ||
26 | nfs4_put_deviceid_node(&mirror_ds->id_node); | ||
27 | } | ||
28 | |||
29 | void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds) | ||
30 | { | ||
31 | nfs4_print_deviceid(&mirror_ds->id_node.deviceid); | ||
32 | nfs4_pnfs_ds_put(mirror_ds->ds); | ||
33 | kfree(mirror_ds); | ||
34 | } | ||
35 | |||
36 | /* Decode opaque device data and construct new_ds using it */ | ||
37 | struct nfs4_ff_layout_ds * | ||
38 | nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | ||
39 | gfp_t gfp_flags) | ||
40 | { | ||
41 | struct xdr_stream stream; | ||
42 | struct xdr_buf buf; | ||
43 | struct page *scratch; | ||
44 | struct list_head dsaddrs; | ||
45 | struct nfs4_pnfs_ds_addr *da; | ||
46 | struct nfs4_ff_layout_ds *new_ds = NULL; | ||
47 | struct nfs4_ff_ds_version *ds_versions = NULL; | ||
48 | u32 mp_count; | ||
49 | u32 version_count; | ||
50 | __be32 *p; | ||
51 | int i, ret = -ENOMEM; | ||
52 | |||
53 | /* set up xdr stream */ | ||
54 | scratch = alloc_page(gfp_flags); | ||
55 | if (!scratch) | ||
56 | goto out_err; | ||
57 | |||
58 | new_ds = kzalloc(sizeof(struct nfs4_ff_layout_ds), gfp_flags); | ||
59 | if (!new_ds) | ||
60 | goto out_scratch; | ||
61 | |||
62 | nfs4_init_deviceid_node(&new_ds->id_node, | ||
63 | server, | ||
64 | &pdev->dev_id); | ||
65 | INIT_LIST_HEAD(&dsaddrs); | ||
66 | |||
67 | xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); | ||
68 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | ||
69 | |||
70 | /* multipath count */ | ||
71 | p = xdr_inline_decode(&stream, 4); | ||
72 | if (unlikely(!p)) | ||
73 | goto out_err_drain_dsaddrs; | ||
74 | mp_count = be32_to_cpup(p); | ||
75 | dprintk("%s: multipath ds count %d\n", __func__, mp_count); | ||
76 | |||
77 | for (i = 0; i < mp_count; i++) { | ||
78 | /* multipath ds */ | ||
79 | da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, | ||
80 | &stream, gfp_flags); | ||
81 | if (da) | ||
82 | list_add_tail(&da->da_node, &dsaddrs); | ||
83 | } | ||
84 | if (list_empty(&dsaddrs)) { | ||
85 | dprintk("%s: no suitable DS addresses found\n", | ||
86 | __func__); | ||
87 | ret = -ENOMEDIUM; | ||
88 | goto out_err_drain_dsaddrs; | ||
89 | } | ||
90 | |||
91 | /* version count */ | ||
92 | p = xdr_inline_decode(&stream, 4); | ||
93 | if (unlikely(!p)) | ||
94 | goto out_err_drain_dsaddrs; | ||
95 | version_count = be32_to_cpup(p); | ||
96 | dprintk("%s: version count %d\n", __func__, version_count); | ||
97 | |||
98 | ds_versions = kzalloc(version_count * sizeof(struct nfs4_ff_ds_version), | ||
99 | gfp_flags); | ||
100 | if (!ds_versions) | ||
101 | goto out_scratch; | ||
102 | |||
103 | for (i = 0; i < version_count; i++) { | ||
104 | /* 20 = version(4) + minor_version(4) + rsize(4) + wsize(4) + | ||
105 | * tightly_coupled(4) */ | ||
106 | p = xdr_inline_decode(&stream, 20); | ||
107 | if (unlikely(!p)) | ||
108 | goto out_err_drain_dsaddrs; | ||
109 | ds_versions[i].version = be32_to_cpup(p++); | ||
110 | ds_versions[i].minor_version = be32_to_cpup(p++); | ||
111 | ds_versions[i].rsize = nfs_block_size(be32_to_cpup(p++), NULL); | ||
112 | ds_versions[i].wsize = nfs_block_size(be32_to_cpup(p++), NULL); | ||
113 | ds_versions[i].tightly_coupled = be32_to_cpup(p); | ||
114 | |||
115 | if (ds_versions[i].rsize > NFS_MAX_FILE_IO_SIZE) | ||
116 | ds_versions[i].rsize = NFS_MAX_FILE_IO_SIZE; | ||
117 | if (ds_versions[i].wsize > NFS_MAX_FILE_IO_SIZE) | ||
118 | ds_versions[i].wsize = NFS_MAX_FILE_IO_SIZE; | ||
119 | |||
120 | if (ds_versions[i].version != 3 || ds_versions[i].minor_version != 0) { | ||
121 | dprintk("%s: [%d] unsupported ds version %d-%d\n", __func__, | ||
122 | i, ds_versions[i].version, | ||
123 | ds_versions[i].minor_version); | ||
124 | ret = -EPROTONOSUPPORT; | ||
125 | goto out_err_drain_dsaddrs; | ||
126 | } | ||
127 | |||
128 | dprintk("%s: [%d] vers %u minor_ver %u rsize %u wsize %u coupled %d\n", | ||
129 | __func__, i, ds_versions[i].version, | ||
130 | ds_versions[i].minor_version, | ||
131 | ds_versions[i].rsize, | ||
132 | ds_versions[i].wsize, | ||
133 | ds_versions[i].tightly_coupled); | ||
134 | } | ||
135 | |||
136 | new_ds->ds_versions = ds_versions; | ||
137 | new_ds->ds_versions_cnt = version_count; | ||
138 | |||
139 | new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); | ||
140 | if (!new_ds->ds) | ||
141 | goto out_err_drain_dsaddrs; | ||
142 | |||
143 | /* If DS was already in cache, free ds addrs */ | ||
144 | while (!list_empty(&dsaddrs)) { | ||
145 | da = list_first_entry(&dsaddrs, | ||
146 | struct nfs4_pnfs_ds_addr, | ||
147 | da_node); | ||
148 | list_del_init(&da->da_node); | ||
149 | kfree(da->da_remotestr); | ||
150 | kfree(da); | ||
151 | } | ||
152 | |||
153 | __free_page(scratch); | ||
154 | return new_ds; | ||
155 | |||
156 | out_err_drain_dsaddrs: | ||
157 | while (!list_empty(&dsaddrs)) { | ||
158 | da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, | ||
159 | da_node); | ||
160 | list_del_init(&da->da_node); | ||
161 | kfree(da->da_remotestr); | ||
162 | kfree(da); | ||
163 | } | ||
164 | |||
165 | kfree(ds_versions); | ||
166 | out_scratch: | ||
167 | __free_page(scratch); | ||
168 | out_err: | ||
169 | kfree(new_ds); | ||
170 | |||
171 | dprintk("%s ERROR: returning %d\n", __func__, ret); | ||
172 | return NULL; | ||
173 | } | ||
174 | |||
175 | static u64 | ||
176 | end_offset(u64 start, u64 len) | ||
177 | { | ||
178 | u64 end; | ||
179 | |||
180 | end = start + len; | ||
181 | return end >= start ? end : NFS4_MAX_UINT64; | ||
182 | } | ||
183 | |||
184 | static void extend_ds_error(struct nfs4_ff_layout_ds_err *err, | ||
185 | u64 offset, u64 length) | ||
186 | { | ||
187 | u64 end; | ||
188 | |||
189 | end = max_t(u64, end_offset(err->offset, err->length), | ||
190 | end_offset(offset, length)); | ||
191 | err->offset = min_t(u64, err->offset, offset); | ||
192 | err->length = end - err->offset; | ||
193 | } | ||
194 | |||
195 | static bool ds_error_can_merge(struct nfs4_ff_layout_ds_err *err, u64 offset, | ||
196 | u64 length, int status, enum nfs_opnum4 opnum, | ||
197 | nfs4_stateid *stateid, | ||
198 | struct nfs4_deviceid *deviceid) | ||
199 | { | ||
200 | return err->status == status && err->opnum == opnum && | ||
201 | nfs4_stateid_match(&err->stateid, stateid) && | ||
202 | !memcmp(&err->deviceid, deviceid, sizeof(*deviceid)) && | ||
203 | end_offset(err->offset, err->length) >= offset && | ||
204 | err->offset <= end_offset(offset, length); | ||
205 | } | ||
206 | |||
207 | static bool merge_ds_error(struct nfs4_ff_layout_ds_err *old, | ||
208 | struct nfs4_ff_layout_ds_err *new) | ||
209 | { | ||
210 | if (!ds_error_can_merge(old, new->offset, new->length, new->status, | ||
211 | new->opnum, &new->stateid, &new->deviceid)) | ||
212 | return false; | ||
213 | |||
214 | extend_ds_error(old, new->offset, new->length); | ||
215 | return true; | ||
216 | } | ||
217 | |||
218 | static bool | ||
219 | ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo, | ||
220 | struct nfs4_ff_layout_ds_err *dserr) | ||
221 | { | ||
222 | struct nfs4_ff_layout_ds_err *err; | ||
223 | |||
224 | list_for_each_entry(err, &flo->error_list, list) { | ||
225 | if (merge_ds_error(err, dserr)) { | ||
226 | return true; | ||
227 | } | ||
228 | } | ||
229 | |||
230 | list_add(&dserr->list, &flo->error_list); | ||
231 | return false; | ||
232 | } | ||
233 | |||
234 | static bool | ||
235 | ff_layout_update_ds_error(struct nfs4_flexfile_layout *flo, u64 offset, | ||
236 | u64 length, int status, enum nfs_opnum4 opnum, | ||
237 | nfs4_stateid *stateid, struct nfs4_deviceid *deviceid) | ||
238 | { | ||
239 | bool found = false; | ||
240 | struct nfs4_ff_layout_ds_err *err; | ||
241 | |||
242 | list_for_each_entry(err, &flo->error_list, list) { | ||
243 | if (ds_error_can_merge(err, offset, length, status, opnum, | ||
244 | stateid, deviceid)) { | ||
245 | found = true; | ||
246 | extend_ds_error(err, offset, length); | ||
247 | break; | ||
248 | } | ||
249 | } | ||
250 | |||
251 | return found; | ||
252 | } | ||
253 | |||
254 | int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, | ||
255 | struct nfs4_ff_layout_mirror *mirror, u64 offset, | ||
256 | u64 length, int status, enum nfs_opnum4 opnum, | ||
257 | gfp_t gfp_flags) | ||
258 | { | ||
259 | struct nfs4_ff_layout_ds_err *dserr; | ||
260 | bool needfree; | ||
261 | |||
262 | if (status == 0) | ||
263 | return 0; | ||
264 | |||
265 | if (mirror->mirror_ds == NULL) | ||
266 | return -EINVAL; | ||
267 | |||
268 | spin_lock(&flo->generic_hdr.plh_inode->i_lock); | ||
269 | if (ff_layout_update_ds_error(flo, offset, length, status, opnum, | ||
270 | &mirror->stateid, | ||
271 | &mirror->mirror_ds->id_node.deviceid)) { | ||
272 | spin_unlock(&flo->generic_hdr.plh_inode->i_lock); | ||
273 | return 0; | ||
274 | } | ||
275 | spin_unlock(&flo->generic_hdr.plh_inode->i_lock); | ||
276 | dserr = kmalloc(sizeof(*dserr), gfp_flags); | ||
277 | if (!dserr) | ||
278 | return -ENOMEM; | ||
279 | |||
280 | INIT_LIST_HEAD(&dserr->list); | ||
281 | dserr->offset = offset; | ||
282 | dserr->length = length; | ||
283 | dserr->status = status; | ||
284 | dserr->opnum = opnum; | ||
285 | nfs4_stateid_copy(&dserr->stateid, &mirror->stateid); | ||
286 | memcpy(&dserr->deviceid, &mirror->mirror_ds->id_node.deviceid, | ||
287 | NFS4_DEVICEID4_SIZE); | ||
288 | |||
289 | spin_lock(&flo->generic_hdr.plh_inode->i_lock); | ||
290 | needfree = ff_layout_add_ds_error_locked(flo, dserr); | ||
291 | spin_unlock(&flo->generic_hdr.plh_inode->i_lock); | ||
292 | if (needfree) | ||
293 | kfree(dserr); | ||
294 | |||
295 | return 0; | ||
296 | } | ||
297 | |||
298 | /* currently we only support AUTH_NONE and AUTH_SYS */ | ||
299 | static rpc_authflavor_t | ||
300 | nfs4_ff_layout_choose_authflavor(struct nfs4_ff_layout_mirror *mirror) | ||
301 | { | ||
302 | if (mirror->uid == (u32)-1) | ||
303 | return RPC_AUTH_NULL; | ||
304 | return RPC_AUTH_UNIX; | ||
305 | } | ||
306 | |||
307 | /* fetch cred for NFSv3 DS */ | ||
308 | static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror, | ||
309 | struct nfs4_pnfs_ds *ds) | ||
310 | { | ||
311 | if (ds->ds_clp && !mirror->cred && | ||
312 | mirror->mirror_ds->ds_versions[0].version == 3) { | ||
313 | struct rpc_auth *auth = ds->ds_clp->cl_rpcclient->cl_auth; | ||
314 | struct rpc_cred *cred; | ||
315 | struct auth_cred acred = { | ||
316 | .uid = make_kuid(&init_user_ns, mirror->uid), | ||
317 | .gid = make_kgid(&init_user_ns, mirror->gid), | ||
318 | }; | ||
319 | |||
320 | /* AUTH_NULL ignores acred */ | ||
321 | cred = auth->au_ops->lookup_cred(auth, &acred, 0); | ||
322 | if (IS_ERR(cred)) { | ||
323 | dprintk("%s: lookup_cred failed with %ld\n", | ||
324 | __func__, PTR_ERR(cred)); | ||
325 | return PTR_ERR(cred); | ||
326 | } else { | ||
327 | mirror->cred = cred; | ||
328 | } | ||
329 | } | ||
330 | return 0; | ||
331 | } | ||
332 | |||
333 | struct nfs_fh * | ||
334 | nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx) | ||
335 | { | ||
336 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx); | ||
337 | struct nfs_fh *fh = NULL; | ||
338 | struct nfs4_deviceid_node *devid; | ||
339 | |||
340 | if (mirror == NULL || mirror->mirror_ds == NULL || | ||
341 | mirror->mirror_ds->ds == NULL) { | ||
342 | printk(KERN_ERR "NFS: %s: No data server for mirror offset index %d\n", | ||
343 | __func__, mirror_idx); | ||
344 | if (mirror && mirror->mirror_ds) { | ||
345 | devid = &mirror->mirror_ds->id_node; | ||
346 | pnfs_generic_mark_devid_invalid(devid); | ||
347 | } | ||
348 | goto out; | ||
349 | } | ||
350 | |||
351 | /* FIXME: For now assume there is only 1 version available for the DS */ | ||
352 | fh = &mirror->fh_versions[0]; | ||
353 | out: | ||
354 | return fh; | ||
355 | } | ||
356 | |||
357 | /* Upon return, either ds is connected, or ds is NULL */ | ||
358 | struct nfs4_pnfs_ds * | ||
359 | nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | ||
360 | bool fail_return) | ||
361 | { | ||
362 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); | ||
363 | struct nfs4_pnfs_ds *ds = NULL; | ||
364 | struct nfs4_deviceid_node *devid; | ||
365 | struct inode *ino = lseg->pls_layout->plh_inode; | ||
366 | struct nfs_server *s = NFS_SERVER(ino); | ||
367 | unsigned int max_payload; | ||
368 | rpc_authflavor_t flavor; | ||
369 | |||
370 | if (mirror == NULL || mirror->mirror_ds == NULL || | ||
371 | mirror->mirror_ds->ds == NULL) { | ||
372 | printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", | ||
373 | __func__, ds_idx); | ||
374 | if (mirror && mirror->mirror_ds) { | ||
375 | devid = &mirror->mirror_ds->id_node; | ||
376 | pnfs_generic_mark_devid_invalid(devid); | ||
377 | } | ||
378 | goto out; | ||
379 | } | ||
380 | |||
381 | devid = &mirror->mirror_ds->id_node; | ||
382 | if (ff_layout_test_devid_unavailable(devid)) | ||
383 | goto out; | ||
384 | |||
385 | ds = mirror->mirror_ds->ds; | ||
386 | /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ | ||
387 | smp_rmb(); | ||
388 | if (ds->ds_clp) | ||
389 | goto out; | ||
390 | |||
391 | flavor = nfs4_ff_layout_choose_authflavor(mirror); | ||
392 | |||
393 | /* FIXME: For now we assume the server sent only one version of NFS | ||
394 | * to use for the DS. | ||
395 | */ | ||
396 | nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, | ||
397 | dataserver_retrans, | ||
398 | mirror->mirror_ds->ds_versions[0].version, | ||
399 | mirror->mirror_ds->ds_versions[0].minor_version, | ||
400 | flavor); | ||
401 | |||
402 | /* connect success, check rsize/wsize limit */ | ||
403 | if (ds->ds_clp) { | ||
404 | max_payload = | ||
405 | nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient), | ||
406 | NULL); | ||
407 | if (mirror->mirror_ds->ds_versions[0].rsize > max_payload) | ||
408 | mirror->mirror_ds->ds_versions[0].rsize = max_payload; | ||
409 | if (mirror->mirror_ds->ds_versions[0].wsize > max_payload) | ||
410 | mirror->mirror_ds->ds_versions[0].wsize = max_payload; | ||
411 | } else { | ||
412 | ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), | ||
413 | mirror, lseg->pls_range.offset, | ||
414 | lseg->pls_range.length, NFS4ERR_NXIO, | ||
415 | OP_ILLEGAL, GFP_NOIO); | ||
416 | if (fail_return) { | ||
417 | pnfs_error_mark_layout_for_return(ino, lseg); | ||
418 | if (ff_layout_has_available_ds(lseg)) | ||
419 | pnfs_set_retry_layoutget(lseg->pls_layout); | ||
420 | else | ||
421 | pnfs_clear_retry_layoutget(lseg->pls_layout); | ||
422 | |||
423 | } else { | ||
424 | if (ff_layout_has_available_ds(lseg)) | ||
425 | set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, | ||
426 | &lseg->pls_layout->plh_flags); | ||
427 | else { | ||
428 | pnfs_error_mark_layout_for_return(ino, lseg); | ||
429 | pnfs_clear_retry_layoutget(lseg->pls_layout); | ||
430 | } | ||
431 | } | ||
432 | } | ||
433 | |||
434 | if (ff_layout_update_mirror_cred(mirror, ds)) | ||
435 | ds = NULL; | ||
436 | out: | ||
437 | return ds; | ||
438 | } | ||
439 | |||
440 | struct rpc_cred * | ||
441 | ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx, | ||
442 | struct rpc_cred *mdscred) | ||
443 | { | ||
444 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); | ||
445 | struct rpc_cred *cred = ERR_PTR(-EINVAL); | ||
446 | |||
447 | if (!nfs4_ff_layout_prepare_ds(lseg, ds_idx, true)) | ||
448 | goto out; | ||
449 | |||
450 | if (mirror && mirror->cred) | ||
451 | cred = mirror->cred; | ||
452 | else | ||
453 | cred = mdscred; | ||
454 | out: | ||
455 | return cred; | ||
456 | } | ||
457 | |||
458 | /** | ||
459 | * Find or create a DS rpc client with th MDS server rpc client auth flavor | ||
460 | * in the nfs_client cl_ds_clients list. | ||
461 | */ | ||
462 | struct rpc_clnt * | ||
463 | nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, u32 ds_idx, | ||
464 | struct nfs_client *ds_clp, struct inode *inode) | ||
465 | { | ||
466 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); | ||
467 | |||
468 | switch (mirror->mirror_ds->ds_versions[0].version) { | ||
469 | case 3: | ||
470 | /* For NFSv3 DS, flavor is set when creating DS connections */ | ||
471 | return ds_clp->cl_rpcclient; | ||
472 | case 4: | ||
473 | return nfs4_find_or_create_ds_client(ds_clp, inode); | ||
474 | default: | ||
475 | BUG(); | ||
476 | } | ||
477 | } | ||
478 | |||
479 | static bool is_range_intersecting(u64 offset1, u64 length1, | ||
480 | u64 offset2, u64 length2) | ||
481 | { | ||
482 | u64 end1 = end_offset(offset1, length1); | ||
483 | u64 end2 = end_offset(offset2, length2); | ||
484 | |||
485 | return (end1 == NFS4_MAX_UINT64 || end1 > offset2) && | ||
486 | (end2 == NFS4_MAX_UINT64 || end2 > offset1); | ||
487 | } | ||
488 | |||
489 | /* called with inode i_lock held */ | ||
490 | int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, | ||
491 | struct xdr_stream *xdr, int *count, | ||
492 | const struct pnfs_layout_range *range) | ||
493 | { | ||
494 | struct nfs4_ff_layout_ds_err *err, *n; | ||
495 | __be32 *p; | ||
496 | |||
497 | list_for_each_entry_safe(err, n, &flo->error_list, list) { | ||
498 | if (!is_range_intersecting(err->offset, err->length, | ||
499 | range->offset, range->length)) | ||
500 | continue; | ||
501 | /* offset(8) + length(8) + stateid(NFS4_STATEID_SIZE) | ||
502 | * + deviceid(NFS4_DEVICEID4_SIZE) + status(4) + opnum(4) | ||
503 | */ | ||
504 | p = xdr_reserve_space(xdr, | ||
505 | 24 + NFS4_STATEID_SIZE + NFS4_DEVICEID4_SIZE); | ||
506 | if (unlikely(!p)) | ||
507 | return -ENOBUFS; | ||
508 | p = xdr_encode_hyper(p, err->offset); | ||
509 | p = xdr_encode_hyper(p, err->length); | ||
510 | p = xdr_encode_opaque_fixed(p, &err->stateid, | ||
511 | NFS4_STATEID_SIZE); | ||
512 | p = xdr_encode_opaque_fixed(p, &err->deviceid, | ||
513 | NFS4_DEVICEID4_SIZE); | ||
514 | *p++ = cpu_to_be32(err->status); | ||
515 | *p++ = cpu_to_be32(err->opnum); | ||
516 | *count += 1; | ||
517 | list_del(&err->list); | ||
518 | dprintk("%s: offset %llu length %llu status %d op %d count %d\n", | ||
519 | __func__, err->offset, err->length, err->status, | ||
520 | err->opnum, *count); | ||
521 | kfree(err); | ||
522 | } | ||
523 | |||
524 | return 0; | ||
525 | } | ||
526 | |||
527 | bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg) | ||
528 | { | ||
529 | struct nfs4_ff_layout_mirror *mirror; | ||
530 | struct nfs4_deviceid_node *devid; | ||
531 | int idx; | ||
532 | |||
533 | for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { | ||
534 | mirror = FF_LAYOUT_COMP(lseg, idx); | ||
535 | if (mirror && mirror->mirror_ds) { | ||
536 | devid = &mirror->mirror_ds->id_node; | ||
537 | if (!ff_layout_test_devid_unavailable(devid)) | ||
538 | return true; | ||
539 | } | ||
540 | } | ||
541 | |||
542 | return false; | ||
543 | } | ||
544 | |||
545 | module_param(dataserver_retrans, uint, 0644); | ||
546 | MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " | ||
547 | "retries a request before it attempts further " | ||
548 | " recovery action."); | ||
549 | module_param(dataserver_timeo, uint, 0644); | ||
550 | MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the " | ||
551 | "NFSv4.1 client waits for a response from a " | ||
552 | " data server before it retries an NFS request."); | ||
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 2f5db844c172..857e2a99acc8 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c | |||
@@ -152,7 +152,7 @@ void nfs_fattr_map_and_free_names(struct nfs_server *server, struct nfs_fattr *f | |||
152 | nfs_fattr_free_group_name(fattr); | 152 | nfs_fattr_free_group_name(fattr); |
153 | } | 153 | } |
154 | 154 | ||
155 | static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res) | 155 | int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res) |
156 | { | 156 | { |
157 | unsigned long val; | 157 | unsigned long val; |
158 | char buf[16]; | 158 | char buf[16]; |
@@ -166,6 +166,7 @@ static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *re | |||
166 | *res = val; | 166 | *res = val; |
167 | return 1; | 167 | return 1; |
168 | } | 168 | } |
169 | EXPORT_SYMBOL_GPL(nfs_map_string_to_numeric); | ||
169 | 170 | ||
170 | static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen) | 171 | static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen) |
171 | { | 172 | { |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 2211f6ba8736..e4f0dcef8f54 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -388,7 +388,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st | |||
388 | if (S_ISREG(inode->i_mode)) { | 388 | if (S_ISREG(inode->i_mode)) { |
389 | inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops; | 389 | inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops; |
390 | inode->i_data.a_ops = &nfs_file_aops; | 390 | inode->i_data.a_ops = &nfs_file_aops; |
391 | inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info; | ||
392 | } else if (S_ISDIR(inode->i_mode)) { | 391 | } else if (S_ISDIR(inode->i_mode)) { |
393 | inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; | 392 | inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; |
394 | inode->i_fop = &nfs_dir_operations; | 393 | inode->i_fop = &nfs_dir_operations; |
@@ -507,10 +506,15 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
507 | attr->ia_valid &= ~ATTR_MODE; | 506 | attr->ia_valid &= ~ATTR_MODE; |
508 | 507 | ||
509 | if (attr->ia_valid & ATTR_SIZE) { | 508 | if (attr->ia_valid & ATTR_SIZE) { |
509 | loff_t i_size; | ||
510 | |||
510 | BUG_ON(!S_ISREG(inode->i_mode)); | 511 | BUG_ON(!S_ISREG(inode->i_mode)); |
511 | 512 | ||
512 | if (attr->ia_size == i_size_read(inode)) | 513 | i_size = i_size_read(inode); |
514 | if (attr->ia_size == i_size) | ||
513 | attr->ia_valid &= ~ATTR_SIZE; | 515 | attr->ia_valid &= ~ATTR_SIZE; |
516 | else if (attr->ia_size < i_size && IS_SWAPFILE(inode)) | ||
517 | return -ETXTBSY; | ||
514 | } | 518 | } |
515 | 519 | ||
516 | /* Optimization: if the end result is no change, don't RPC */ | 520 | /* Optimization: if the end result is no change, don't RPC */ |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b6f34bfa6fe8..212b8c883d22 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/mount.h> | 6 | #include <linux/mount.h> |
7 | #include <linux/security.h> | 7 | #include <linux/security.h> |
8 | #include <linux/crc32.h> | 8 | #include <linux/crc32.h> |
9 | #include <linux/nfs_page.h> | ||
9 | 10 | ||
10 | #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) | 11 | #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) |
11 | 12 | ||
@@ -187,9 +188,15 @@ extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, | |||
187 | const struct sockaddr *ds_addr, | 188 | const struct sockaddr *ds_addr, |
188 | int ds_addrlen, int ds_proto, | 189 | int ds_addrlen, int ds_proto, |
189 | unsigned int ds_timeo, | 190 | unsigned int ds_timeo, |
190 | unsigned int ds_retrans); | 191 | unsigned int ds_retrans, |
192 | u32 minor_version, | ||
193 | rpc_authflavor_t au_flavor); | ||
191 | extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *, | 194 | extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *, |
192 | struct inode *); | 195 | struct inode *); |
196 | extern struct nfs_client *nfs3_set_ds_client(struct nfs_client *mds_clp, | ||
197 | const struct sockaddr *ds_addr, int ds_addrlen, | ||
198 | int ds_proto, unsigned int ds_timeo, | ||
199 | unsigned int ds_retrans, rpc_authflavor_t au_flavor); | ||
193 | #ifdef CONFIG_PROC_FS | 200 | #ifdef CONFIG_PROC_FS |
194 | extern int __init nfs_fs_proc_init(void); | 201 | extern int __init nfs_fs_proc_init(void); |
195 | extern void nfs_fs_proc_exit(void); | 202 | extern void nfs_fs_proc_exit(void); |
@@ -242,9 +249,12 @@ struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *); | |||
242 | void nfs_pgio_header_free(struct nfs_pgio_header *); | 249 | void nfs_pgio_header_free(struct nfs_pgio_header *); |
243 | void nfs_pgio_data_destroy(struct nfs_pgio_header *); | 250 | void nfs_pgio_data_destroy(struct nfs_pgio_header *); |
244 | int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); | 251 | int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); |
245 | int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_header *, | 252 | int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, |
246 | const struct rpc_call_ops *, int, int); | 253 | struct rpc_cred *cred, const struct nfs_rpc_ops *rpc_ops, |
254 | const struct rpc_call_ops *call_ops, int how, int flags); | ||
247 | void nfs_free_request(struct nfs_page *req); | 255 | void nfs_free_request(struct nfs_page *req); |
256 | struct nfs_pgio_mirror * | ||
257 | nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc); | ||
248 | 258 | ||
249 | static inline void nfs_iocounter_init(struct nfs_io_counter *c) | 259 | static inline void nfs_iocounter_init(struct nfs_io_counter *c) |
250 | { | 260 | { |
@@ -252,6 +262,12 @@ static inline void nfs_iocounter_init(struct nfs_io_counter *c) | |||
252 | atomic_set(&c->io_count, 0); | 262 | atomic_set(&c->io_count, 0); |
253 | } | 263 | } |
254 | 264 | ||
265 | static inline bool nfs_pgio_has_mirroring(struct nfs_pageio_descriptor *desc) | ||
266 | { | ||
267 | WARN_ON_ONCE(desc->pg_mirror_count < 1); | ||
268 | return desc->pg_mirror_count > 1; | ||
269 | } | ||
270 | |||
255 | /* nfs2xdr.c */ | 271 | /* nfs2xdr.c */ |
256 | extern struct rpc_procinfo nfs_procedures[]; | 272 | extern struct rpc_procinfo nfs_procedures[]; |
257 | extern int nfs2_decode_dirent(struct xdr_stream *, | 273 | extern int nfs2_decode_dirent(struct xdr_stream *, |
@@ -375,7 +391,7 @@ extern struct rpc_stat nfs_rpcstat; | |||
375 | 391 | ||
376 | extern int __init register_nfs_fs(void); | 392 | extern int __init register_nfs_fs(void); |
377 | extern void __exit unregister_nfs_fs(void); | 393 | extern void __exit unregister_nfs_fs(void); |
378 | extern void nfs_sb_active(struct super_block *sb); | 394 | extern bool nfs_sb_active(struct super_block *sb); |
379 | extern void nfs_sb_deactive(struct super_block *sb); | 395 | extern void nfs_sb_deactive(struct super_block *sb); |
380 | 396 | ||
381 | /* namespace.c */ | 397 | /* namespace.c */ |
@@ -414,7 +430,6 @@ int nfs_show_options(struct seq_file *, struct dentry *); | |||
414 | int nfs_show_devname(struct seq_file *, struct dentry *); | 430 | int nfs_show_devname(struct seq_file *, struct dentry *); |
415 | int nfs_show_path(struct seq_file *, struct dentry *); | 431 | int nfs_show_path(struct seq_file *, struct dentry *); |
416 | int nfs_show_stats(struct seq_file *, struct dentry *); | 432 | int nfs_show_stats(struct seq_file *, struct dentry *); |
417 | void nfs_put_super(struct super_block *); | ||
418 | int nfs_remount(struct super_block *sb, int *flags, char *raw_data); | 433 | int nfs_remount(struct super_block *sb, int *flags, char *raw_data); |
419 | 434 | ||
420 | /* write.c */ | 435 | /* write.c */ |
@@ -427,6 +442,7 @@ extern void nfs_write_prepare(struct rpc_task *task, void *calldata); | |||
427 | extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); | 442 | extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); |
428 | extern int nfs_initiate_commit(struct rpc_clnt *clnt, | 443 | extern int nfs_initiate_commit(struct rpc_clnt *clnt, |
429 | struct nfs_commit_data *data, | 444 | struct nfs_commit_data *data, |
445 | const struct nfs_rpc_ops *nfs_ops, | ||
430 | const struct rpc_call_ops *call_ops, | 446 | const struct rpc_call_ops *call_ops, |
431 | int how, int flags); | 447 | int how, int flags); |
432 | extern void nfs_init_commit(struct nfs_commit_data *data, | 448 | extern void nfs_init_commit(struct nfs_commit_data *data, |
@@ -440,13 +456,15 @@ int nfs_scan_commit(struct inode *inode, struct list_head *dst, | |||
440 | struct nfs_commit_info *cinfo); | 456 | struct nfs_commit_info *cinfo); |
441 | void nfs_mark_request_commit(struct nfs_page *req, | 457 | void nfs_mark_request_commit(struct nfs_page *req, |
442 | struct pnfs_layout_segment *lseg, | 458 | struct pnfs_layout_segment *lseg, |
443 | struct nfs_commit_info *cinfo); | 459 | struct nfs_commit_info *cinfo, |
460 | u32 ds_commit_idx); | ||
444 | int nfs_write_need_commit(struct nfs_pgio_header *); | 461 | int nfs_write_need_commit(struct nfs_pgio_header *); |
445 | int nfs_generic_commit_list(struct inode *inode, struct list_head *head, | 462 | int nfs_generic_commit_list(struct inode *inode, struct list_head *head, |
446 | int how, struct nfs_commit_info *cinfo); | 463 | int how, struct nfs_commit_info *cinfo); |
447 | void nfs_retry_commit(struct list_head *page_list, | 464 | void nfs_retry_commit(struct list_head *page_list, |
448 | struct pnfs_layout_segment *lseg, | 465 | struct pnfs_layout_segment *lseg, |
449 | struct nfs_commit_info *cinfo); | 466 | struct nfs_commit_info *cinfo, |
467 | u32 ds_commit_idx); | ||
450 | void nfs_commitdata_release(struct nfs_commit_data *data); | 468 | void nfs_commitdata_release(struct nfs_commit_data *data); |
451 | void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, | 469 | void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, |
452 | struct nfs_commit_info *cinfo); | 470 | struct nfs_commit_info *cinfo); |
@@ -457,6 +475,7 @@ void nfs_init_cinfo(struct nfs_commit_info *cinfo, | |||
457 | struct nfs_direct_req *dreq); | 475 | struct nfs_direct_req *dreq); |
458 | int nfs_key_timeout_notify(struct file *filp, struct inode *inode); | 476 | int nfs_key_timeout_notify(struct file *filp, struct inode *inode); |
459 | bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx); | 477 | bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx); |
478 | void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio); | ||
460 | 479 | ||
461 | #ifdef CONFIG_MIGRATION | 480 | #ifdef CONFIG_MIGRATION |
462 | extern int nfs_migrate_page(struct address_space *, | 481 | extern int nfs_migrate_page(struct address_space *, |
@@ -480,6 +499,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode) | |||
480 | inode_dio_wait(inode); | 499 | inode_dio_wait(inode); |
481 | } | 500 | } |
482 | extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); | 501 | extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); |
502 | extern void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq); | ||
483 | 503 | ||
484 | /* nfs4proc.c */ | 504 | /* nfs4proc.c */ |
485 | extern void __nfs4_read_done_cb(struct nfs_pgio_header *); | 505 | extern void __nfs4_read_done_cb(struct nfs_pgio_header *); |
@@ -493,6 +513,26 @@ extern int nfs41_walk_client_list(struct nfs_client *clp, | |||
493 | struct nfs_client **result, | 513 | struct nfs_client **result, |
494 | struct rpc_cred *cred); | 514 | struct rpc_cred *cred); |
495 | 515 | ||
516 | static inline struct inode *nfs_igrab_and_active(struct inode *inode) | ||
517 | { | ||
518 | inode = igrab(inode); | ||
519 | if (inode != NULL && !nfs_sb_active(inode->i_sb)) { | ||
520 | iput(inode); | ||
521 | inode = NULL; | ||
522 | } | ||
523 | return inode; | ||
524 | } | ||
525 | |||
526 | static inline void nfs_iput_and_deactive(struct inode *inode) | ||
527 | { | ||
528 | if (inode != NULL) { | ||
529 | struct super_block *sb = inode->i_sb; | ||
530 | |||
531 | iput(inode); | ||
532 | nfs_sb_deactive(sb); | ||
533 | } | ||
534 | } | ||
535 | |||
496 | /* | 536 | /* |
497 | * Determine the device name as a string | 537 | * Determine the device name as a string |
498 | */ | 538 | */ |
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 5f61b83f4a1c..b4e03ed8599d 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c | |||
@@ -481,7 +481,8 @@ out_overflow: | |||
481 | * void; | 481 | * void; |
482 | * }; | 482 | * }; |
483 | */ | 483 | */ |
484 | static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result) | 484 | static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result, |
485 | __u32 *op_status) | ||
485 | { | 486 | { |
486 | enum nfs_stat status; | 487 | enum nfs_stat status; |
487 | int error; | 488 | int error; |
@@ -489,6 +490,8 @@ static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result) | |||
489 | error = decode_stat(xdr, &status); | 490 | error = decode_stat(xdr, &status); |
490 | if (unlikely(error)) | 491 | if (unlikely(error)) |
491 | goto out; | 492 | goto out; |
493 | if (op_status) | ||
494 | *op_status = status; | ||
492 | if (status != NFS_OK) | 495 | if (status != NFS_OK) |
493 | goto out_default; | 496 | goto out_default; |
494 | error = decode_fattr(xdr, result); | 497 | error = decode_fattr(xdr, result); |
@@ -808,7 +811,7 @@ out_default: | |||
808 | static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr, | 811 | static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr, |
809 | struct nfs_fattr *result) | 812 | struct nfs_fattr *result) |
810 | { | 813 | { |
811 | return decode_attrstat(xdr, result); | 814 | return decode_attrstat(xdr, result, NULL); |
812 | } | 815 | } |
813 | 816 | ||
814 | static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr, | 817 | static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr, |
@@ -865,6 +868,7 @@ static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr, | |||
865 | error = decode_stat(xdr, &status); | 868 | error = decode_stat(xdr, &status); |
866 | if (unlikely(error)) | 869 | if (unlikely(error)) |
867 | goto out; | 870 | goto out; |
871 | result->op_status = status; | ||
868 | if (status != NFS_OK) | 872 | if (status != NFS_OK) |
869 | goto out_default; | 873 | goto out_default; |
870 | error = decode_fattr(xdr, result->fattr); | 874 | error = decode_fattr(xdr, result->fattr); |
@@ -882,7 +886,7 @@ static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr, | |||
882 | { | 886 | { |
883 | /* All NFSv2 writes are "file sync" writes */ | 887 | /* All NFSv2 writes are "file sync" writes */ |
884 | result->verf->committed = NFS_FILE_SYNC; | 888 | result->verf->committed = NFS_FILE_SYNC; |
885 | return decode_attrstat(xdr, result->fattr); | 889 | return decode_attrstat(xdr, result->fattr, &result->op_status); |
886 | } | 890 | } |
887 | 891 | ||
888 | /** | 892 | /** |
diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h index 333ae4068506..e134d6548ab7 100644 --- a/fs/nfs/nfs3_fs.h +++ b/fs/nfs/nfs3_fs.h | |||
@@ -30,5 +30,7 @@ struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subver | |||
30 | struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, | 30 | struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, |
31 | struct nfs_fattr *, rpc_authflavor_t); | 31 | struct nfs_fattr *, rpc_authflavor_t); |
32 | 32 | ||
33 | /* nfs3super.c */ | ||
34 | extern struct nfs_subversion nfs_v3; | ||
33 | 35 | ||
34 | #endif /* __LINUX_FS_NFS_NFS3_FS_H */ | 36 | #endif /* __LINUX_FS_NFS_NFS3_FS_H */ |
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 8c1b437c5403..9e9fa347a948 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c | |||
@@ -1,5 +1,6 @@ | |||
1 | #include <linux/nfs_fs.h> | 1 | #include <linux/nfs_fs.h> |
2 | #include <linux/nfs_mount.h> | 2 | #include <linux/nfs_mount.h> |
3 | #include <linux/sunrpc/addr.h> | ||
3 | #include "internal.h" | 4 | #include "internal.h" |
4 | #include "nfs3_fs.h" | 5 | #include "nfs3_fs.h" |
5 | 6 | ||
@@ -64,3 +65,43 @@ struct nfs_server *nfs3_clone_server(struct nfs_server *source, | |||
64 | nfs_init_server_aclclient(server); | 65 | nfs_init_server_aclclient(server); |
65 | return server; | 66 | return server; |
66 | } | 67 | } |
68 | |||
69 | /* | ||
70 | * Set up a pNFS Data Server client over NFSv3. | ||
71 | * | ||
72 | * Return any existing nfs_client that matches server address,port,version | ||
73 | * and minorversion. | ||
74 | * | ||
75 | * For a new nfs_client, use a soft mount (default), a low retrans and a | ||
76 | * low timeout interval so that if a connection is lost, we retry through | ||
77 | * the MDS. | ||
78 | */ | ||
79 | struct nfs_client *nfs3_set_ds_client(struct nfs_client *mds_clp, | ||
80 | const struct sockaddr *ds_addr, int ds_addrlen, | ||
81 | int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans, | ||
82 | rpc_authflavor_t au_flavor) | ||
83 | { | ||
84 | struct nfs_client_initdata cl_init = { | ||
85 | .addr = ds_addr, | ||
86 | .addrlen = ds_addrlen, | ||
87 | .nfs_mod = &nfs_v3, | ||
88 | .proto = ds_proto, | ||
89 | .net = mds_clp->cl_net, | ||
90 | }; | ||
91 | struct rpc_timeout ds_timeout; | ||
92 | struct nfs_client *clp; | ||
93 | char buf[INET6_ADDRSTRLEN + 1]; | ||
94 | |||
95 | /* fake a hostname because lockd wants it */ | ||
96 | if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0) | ||
97 | return ERR_PTR(-EINVAL); | ||
98 | cl_init.hostname = buf; | ||
99 | |||
100 | /* Use the MDS nfs_client cl_ipaddr. */ | ||
101 | nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); | ||
102 | clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, | ||
103 | au_flavor); | ||
104 | |||
105 | return clp; | ||
106 | } | ||
107 | EXPORT_SYMBOL_GPL(nfs3_set_ds_client); | ||
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 524f9f837408..78e557c3ab87 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -800,6 +800,9 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) | |||
800 | { | 800 | { |
801 | struct inode *inode = hdr->inode; | 801 | struct inode *inode = hdr->inode; |
802 | 802 | ||
803 | if (hdr->pgio_done_cb != NULL) | ||
804 | return hdr->pgio_done_cb(task, hdr); | ||
805 | |||
803 | if (nfs3_async_handle_jukebox(task, inode)) | 806 | if (nfs3_async_handle_jukebox(task, inode)) |
804 | return -EAGAIN; | 807 | return -EAGAIN; |
805 | 808 | ||
@@ -825,6 +828,9 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) | |||
825 | { | 828 | { |
826 | struct inode *inode = hdr->inode; | 829 | struct inode *inode = hdr->inode; |
827 | 830 | ||
831 | if (hdr->pgio_done_cb != NULL) | ||
832 | return hdr->pgio_done_cb(task, hdr); | ||
833 | |||
828 | if (nfs3_async_handle_jukebox(task, inode)) | 834 | if (nfs3_async_handle_jukebox(task, inode)) |
829 | return -EAGAIN; | 835 | return -EAGAIN; |
830 | if (task->tk_status >= 0) | 836 | if (task->tk_status >= 0) |
@@ -845,6 +851,9 @@ static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commi | |||
845 | 851 | ||
846 | static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data) | 852 | static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data) |
847 | { | 853 | { |
854 | if (data->commit_done_cb != NULL) | ||
855 | return data->commit_done_cb(task, data); | ||
856 | |||
848 | if (nfs3_async_handle_jukebox(task, data->inode)) | 857 | if (nfs3_async_handle_jukebox(task, data->inode)) |
849 | return -EAGAIN; | 858 | return -EAGAIN; |
850 | nfs_refresh_inode(data->inode, data->res.fattr); | 859 | nfs_refresh_inode(data->inode, data->res.fattr); |
diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c index 6af29c2da352..5c4394e4656b 100644 --- a/fs/nfs/nfs3super.c +++ b/fs/nfs/nfs3super.c | |||
@@ -7,7 +7,7 @@ | |||
7 | #include "nfs3_fs.h" | 7 | #include "nfs3_fs.h" |
8 | #include "nfs.h" | 8 | #include "nfs.h" |
9 | 9 | ||
10 | static struct nfs_subversion nfs_v3 = { | 10 | struct nfs_subversion nfs_v3 = { |
11 | .owner = THIS_MODULE, | 11 | .owner = THIS_MODULE, |
12 | .nfs_fs = &nfs_fs_type, | 12 | .nfs_fs = &nfs_fs_type, |
13 | .rpc_vers = &nfs_version3, | 13 | .rpc_vers = &nfs_version3, |
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 8f4cbe7f4aa8..2a932fdc57cb 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c | |||
@@ -1636,6 +1636,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, | |||
1636 | error = decode_post_op_attr(xdr, result->fattr); | 1636 | error = decode_post_op_attr(xdr, result->fattr); |
1637 | if (unlikely(error)) | 1637 | if (unlikely(error)) |
1638 | goto out; | 1638 | goto out; |
1639 | result->op_status = status; | ||
1639 | if (status != NFS3_OK) | 1640 | if (status != NFS3_OK) |
1640 | goto out_status; | 1641 | goto out_status; |
1641 | error = decode_read3resok(xdr, result); | 1642 | error = decode_read3resok(xdr, result); |
@@ -1708,6 +1709,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr, | |||
1708 | error = decode_wcc_data(xdr, result->fattr); | 1709 | error = decode_wcc_data(xdr, result->fattr); |
1709 | if (unlikely(error)) | 1710 | if (unlikely(error)) |
1710 | goto out; | 1711 | goto out; |
1712 | result->op_status = status; | ||
1711 | if (status != NFS3_OK) | 1713 | if (status != NFS3_OK) |
1712 | goto out_status; | 1714 | goto out_status; |
1713 | error = decode_write3resok(xdr, result); | 1715 | error = decode_write3resok(xdr, result); |
@@ -2323,6 +2325,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, | |||
2323 | error = decode_wcc_data(xdr, result->fattr); | 2325 | error = decode_wcc_data(xdr, result->fattr); |
2324 | if (unlikely(error)) | 2326 | if (unlikely(error)) |
2325 | goto out; | 2327 | goto out; |
2328 | result->op_status = status; | ||
2326 | if (status != NFS3_OK) | 2329 | if (status != NFS3_OK) |
2327 | goto out_status; | 2330 | goto out_status; |
2328 | error = decode_writeverf3(xdr, &result->verf->verifier); | 2331 | error = decode_writeverf3(xdr, &result->verf->verifier); |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index a08178764cf9..fdef424b0cd3 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -44,6 +44,7 @@ enum nfs4_client_state { | |||
44 | #define NFS4_RENEW_TIMEOUT 0x01 | 44 | #define NFS4_RENEW_TIMEOUT 0x01 |
45 | #define NFS4_RENEW_DELEGATION_CB 0x02 | 45 | #define NFS4_RENEW_DELEGATION_CB 0x02 |
46 | 46 | ||
47 | struct nfs_seqid_counter; | ||
47 | struct nfs4_minor_version_ops { | 48 | struct nfs4_minor_version_ops { |
48 | u32 minor_version; | 49 | u32 minor_version; |
49 | unsigned init_caps; | 50 | unsigned init_caps; |
@@ -56,6 +57,8 @@ struct nfs4_minor_version_ops { | |||
56 | struct nfs_fsinfo *); | 57 | struct nfs_fsinfo *); |
57 | void (*free_lock_state)(struct nfs_server *, | 58 | void (*free_lock_state)(struct nfs_server *, |
58 | struct nfs4_lock_state *); | 59 | struct nfs4_lock_state *); |
60 | struct nfs_seqid * | ||
61 | (*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); | ||
59 | const struct rpc_call_ops *call_sync_ops; | 62 | const struct rpc_call_ops *call_sync_ops; |
60 | const struct nfs4_state_recovery_ops *reboot_recovery_ops; | 63 | const struct nfs4_state_recovery_ops *reboot_recovery_ops; |
61 | const struct nfs4_state_recovery_ops *nograce_recovery_ops; | 64 | const struct nfs4_state_recovery_ops *nograce_recovery_ops; |
@@ -443,6 +446,12 @@ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); | |||
443 | extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); | 446 | extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); |
444 | extern void nfs_release_seqid(struct nfs_seqid *seqid); | 447 | extern void nfs_release_seqid(struct nfs_seqid *seqid); |
445 | extern void nfs_free_seqid(struct nfs_seqid *seqid); | 448 | extern void nfs_free_seqid(struct nfs_seqid *seqid); |
449 | extern int nfs40_setup_sequence(struct nfs4_slot_table *tbl, | ||
450 | struct nfs4_sequence_args *args, | ||
451 | struct nfs4_sequence_res *res, | ||
452 | struct rpc_task *task); | ||
453 | extern int nfs4_sequence_done(struct rpc_task *task, | ||
454 | struct nfs4_sequence_res *res); | ||
446 | 455 | ||
447 | extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp); | 456 | extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp); |
448 | 457 | ||
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 706ad10b8186..8646af9b11d2 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c | |||
@@ -849,14 +849,15 @@ error: | |||
849 | */ | 849 | */ |
850 | struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, | 850 | struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, |
851 | const struct sockaddr *ds_addr, int ds_addrlen, | 851 | const struct sockaddr *ds_addr, int ds_addrlen, |
852 | int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans) | 852 | int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans, |
853 | u32 minor_version, rpc_authflavor_t au_flavor) | ||
853 | { | 854 | { |
854 | struct nfs_client_initdata cl_init = { | 855 | struct nfs_client_initdata cl_init = { |
855 | .addr = ds_addr, | 856 | .addr = ds_addr, |
856 | .addrlen = ds_addrlen, | 857 | .addrlen = ds_addrlen, |
857 | .nfs_mod = &nfs_v4, | 858 | .nfs_mod = &nfs_v4, |
858 | .proto = ds_proto, | 859 | .proto = ds_proto, |
859 | .minorversion = mds_clp->cl_minorversion, | 860 | .minorversion = minor_version, |
860 | .net = mds_clp->cl_net, | 861 | .net = mds_clp->cl_net, |
861 | }; | 862 | }; |
862 | struct rpc_timeout ds_timeout; | 863 | struct rpc_timeout ds_timeout; |
@@ -874,7 +875,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, | |||
874 | */ | 875 | */ |
875 | nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); | 876 | nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); |
876 | clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, | 877 | clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, |
877 | mds_clp->cl_rpcclient->cl_auth->au_flavor); | 878 | au_flavor); |
878 | 879 | ||
879 | dprintk("<-- %s %p\n", __func__, clp); | 880 | dprintk("<-- %s %p\n", __func__, clp); |
880 | return clp; | 881 | return clp; |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c347705b0161..2e7c9f7a6f7c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -495,12 +495,11 @@ static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) | |||
495 | args->sa_privileged = 1; | 495 | args->sa_privileged = 1; |
496 | } | 496 | } |
497 | 497 | ||
498 | static int nfs40_setup_sequence(const struct nfs_server *server, | 498 | int nfs40_setup_sequence(struct nfs4_slot_table *tbl, |
499 | struct nfs4_sequence_args *args, | 499 | struct nfs4_sequence_args *args, |
500 | struct nfs4_sequence_res *res, | 500 | struct nfs4_sequence_res *res, |
501 | struct rpc_task *task) | 501 | struct rpc_task *task) |
502 | { | 502 | { |
503 | struct nfs4_slot_table *tbl = server->nfs_client->cl_slot_tbl; | ||
504 | struct nfs4_slot *slot; | 503 | struct nfs4_slot *slot; |
505 | 504 | ||
506 | /* slot already allocated? */ | 505 | /* slot already allocated? */ |
@@ -535,6 +534,7 @@ out_sleep: | |||
535 | spin_unlock(&tbl->slot_tbl_lock); | 534 | spin_unlock(&tbl->slot_tbl_lock); |
536 | return -EAGAIN; | 535 | return -EAGAIN; |
537 | } | 536 | } |
537 | EXPORT_SYMBOL_GPL(nfs40_setup_sequence); | ||
538 | 538 | ||
539 | static int nfs40_sequence_done(struct rpc_task *task, | 539 | static int nfs40_sequence_done(struct rpc_task *task, |
540 | struct nfs4_sequence_res *res) | 540 | struct nfs4_sequence_res *res) |
@@ -694,8 +694,7 @@ out_retry: | |||
694 | } | 694 | } |
695 | EXPORT_SYMBOL_GPL(nfs41_sequence_done); | 695 | EXPORT_SYMBOL_GPL(nfs41_sequence_done); |
696 | 696 | ||
697 | static int nfs4_sequence_done(struct rpc_task *task, | 697 | int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) |
698 | struct nfs4_sequence_res *res) | ||
699 | { | 698 | { |
700 | if (res->sr_slot == NULL) | 699 | if (res->sr_slot == NULL) |
701 | return 1; | 700 | return 1; |
@@ -703,6 +702,7 @@ static int nfs4_sequence_done(struct rpc_task *task, | |||
703 | return nfs40_sequence_done(task, res); | 702 | return nfs40_sequence_done(task, res); |
704 | return nfs41_sequence_done(task, res); | 703 | return nfs41_sequence_done(task, res); |
705 | } | 704 | } |
705 | EXPORT_SYMBOL_GPL(nfs4_sequence_done); | ||
706 | 706 | ||
707 | int nfs41_setup_sequence(struct nfs4_session *session, | 707 | int nfs41_setup_sequence(struct nfs4_session *session, |
708 | struct nfs4_sequence_args *args, | 708 | struct nfs4_sequence_args *args, |
@@ -777,7 +777,8 @@ static int nfs4_setup_sequence(const struct nfs_server *server, | |||
777 | int ret = 0; | 777 | int ret = 0; |
778 | 778 | ||
779 | if (!session) | 779 | if (!session) |
780 | return nfs40_setup_sequence(server, args, res, task); | 780 | return nfs40_setup_sequence(server->nfs_client->cl_slot_tbl, |
781 | args, res, task); | ||
781 | 782 | ||
782 | dprintk("--> %s clp %p session %p sr_slot %u\n", | 783 | dprintk("--> %s clp %p session %p sr_slot %u\n", |
783 | __func__, session->clp, session, res->sr_slot ? | 784 | __func__, session->clp, session, res->sr_slot ? |
@@ -818,14 +819,16 @@ static int nfs4_setup_sequence(const struct nfs_server *server, | |||
818 | struct nfs4_sequence_res *res, | 819 | struct nfs4_sequence_res *res, |
819 | struct rpc_task *task) | 820 | struct rpc_task *task) |
820 | { | 821 | { |
821 | return nfs40_setup_sequence(server, args, res, task); | 822 | return nfs40_setup_sequence(server->nfs_client->cl_slot_tbl, |
823 | args, res, task); | ||
822 | } | 824 | } |
823 | 825 | ||
824 | static int nfs4_sequence_done(struct rpc_task *task, | 826 | int nfs4_sequence_done(struct rpc_task *task, |
825 | struct nfs4_sequence_res *res) | 827 | struct nfs4_sequence_res *res) |
826 | { | 828 | { |
827 | return nfs40_sequence_done(task, res); | 829 | return nfs40_sequence_done(task, res); |
828 | } | 830 | } |
831 | EXPORT_SYMBOL_GPL(nfs4_sequence_done); | ||
829 | 832 | ||
830 | #endif /* !CONFIG_NFS_V4_1 */ | 833 | #endif /* !CONFIG_NFS_V4_1 */ |
831 | 834 | ||
@@ -937,6 +940,31 @@ static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server, | |||
937 | return true; | 940 | return true; |
938 | } | 941 | } |
939 | 942 | ||
943 | static u32 | ||
944 | nfs4_map_atomic_open_share(struct nfs_server *server, | ||
945 | fmode_t fmode, int openflags) | ||
946 | { | ||
947 | u32 res = 0; | ||
948 | |||
949 | switch (fmode & (FMODE_READ | FMODE_WRITE)) { | ||
950 | case FMODE_READ: | ||
951 | res = NFS4_SHARE_ACCESS_READ; | ||
952 | break; | ||
953 | case FMODE_WRITE: | ||
954 | res = NFS4_SHARE_ACCESS_WRITE; | ||
955 | break; | ||
956 | case FMODE_READ|FMODE_WRITE: | ||
957 | res = NFS4_SHARE_ACCESS_BOTH; | ||
958 | } | ||
959 | if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1)) | ||
960 | goto out; | ||
961 | /* Want no delegation if we're using O_DIRECT */ | ||
962 | if (openflags & O_DIRECT) | ||
963 | res |= NFS4_SHARE_WANT_NO_DELEG; | ||
964 | out: | ||
965 | return res; | ||
966 | } | ||
967 | |||
940 | static enum open_claim_type4 | 968 | static enum open_claim_type4 |
941 | nfs4_map_atomic_open_claim(struct nfs_server *server, | 969 | nfs4_map_atomic_open_claim(struct nfs_server *server, |
942 | enum open_claim_type4 claim) | 970 | enum open_claim_type4 claim) |
@@ -977,6 +1005,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, | |||
977 | struct dentry *parent = dget_parent(dentry); | 1005 | struct dentry *parent = dget_parent(dentry); |
978 | struct inode *dir = parent->d_inode; | 1006 | struct inode *dir = parent->d_inode; |
979 | struct nfs_server *server = NFS_SERVER(dir); | 1007 | struct nfs_server *server = NFS_SERVER(dir); |
1008 | struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); | ||
980 | struct nfs4_opendata *p; | 1009 | struct nfs4_opendata *p; |
981 | 1010 | ||
982 | p = kzalloc(sizeof(*p), gfp_mask); | 1011 | p = kzalloc(sizeof(*p), gfp_mask); |
@@ -987,8 +1016,9 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, | |||
987 | if (IS_ERR(p->f_label)) | 1016 | if (IS_ERR(p->f_label)) |
988 | goto err_free_p; | 1017 | goto err_free_p; |
989 | 1018 | ||
990 | p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask); | 1019 | alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid; |
991 | if (p->o_arg.seqid == NULL) | 1020 | p->o_arg.seqid = alloc_seqid(&sp->so_seqid, gfp_mask); |
1021 | if (IS_ERR(p->o_arg.seqid)) | ||
992 | goto err_free_label; | 1022 | goto err_free_label; |
993 | nfs_sb_active(dentry->d_sb); | 1023 | nfs_sb_active(dentry->d_sb); |
994 | p->dentry = dget(dentry); | 1024 | p->dentry = dget(dentry); |
@@ -997,6 +1027,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, | |||
997 | atomic_inc(&sp->so_count); | 1027 | atomic_inc(&sp->so_count); |
998 | p->o_arg.open_flags = flags; | 1028 | p->o_arg.open_flags = flags; |
999 | p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); | 1029 | p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); |
1030 | p->o_arg.share_access = nfs4_map_atomic_open_share(server, | ||
1031 | fmode, flags); | ||
1000 | /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS | 1032 | /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS |
1001 | * will return permission denied for all bits until close */ | 1033 | * will return permission denied for all bits until close */ |
1002 | if (!(flags & O_EXCL)) { | 1034 | if (!(flags & O_EXCL)) { |
@@ -1167,6 +1199,16 @@ static bool nfs_need_update_open_stateid(struct nfs4_state *state, | |||
1167 | return false; | 1199 | return false; |
1168 | } | 1200 | } |
1169 | 1201 | ||
1202 | static void nfs_resync_open_stateid_locked(struct nfs4_state *state) | ||
1203 | { | ||
1204 | if (state->n_wronly) | ||
1205 | set_bit(NFS_O_WRONLY_STATE, &state->flags); | ||
1206 | if (state->n_rdonly) | ||
1207 | set_bit(NFS_O_RDONLY_STATE, &state->flags); | ||
1208 | if (state->n_rdwr) | ||
1209 | set_bit(NFS_O_RDWR_STATE, &state->flags); | ||
1210 | } | ||
1211 | |||
1170 | static void nfs_clear_open_stateid_locked(struct nfs4_state *state, | 1212 | static void nfs_clear_open_stateid_locked(struct nfs4_state *state, |
1171 | nfs4_stateid *stateid, fmode_t fmode) | 1213 | nfs4_stateid *stateid, fmode_t fmode) |
1172 | { | 1214 | { |
@@ -1185,8 +1227,12 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state, | |||
1185 | } | 1227 | } |
1186 | if (stateid == NULL) | 1228 | if (stateid == NULL) |
1187 | return; | 1229 | return; |
1188 | if (!nfs_need_update_open_stateid(state, stateid)) | 1230 | /* Handle races with OPEN */ |
1231 | if (!nfs4_stateid_match_other(stateid, &state->open_stateid) || | ||
1232 | !nfs4_stateid_is_newer(stateid, &state->open_stateid)) { | ||
1233 | nfs_resync_open_stateid_locked(state); | ||
1189 | return; | 1234 | return; |
1235 | } | ||
1190 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) | 1236 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) |
1191 | nfs4_stateid_copy(&state->stateid, stateid); | 1237 | nfs4_stateid_copy(&state->stateid, stateid); |
1192 | nfs4_stateid_copy(&state->open_stateid, stateid); | 1238 | nfs4_stateid_copy(&state->open_stateid, stateid); |
@@ -1281,6 +1327,23 @@ no_delegation: | |||
1281 | return ret; | 1327 | return ret; |
1282 | } | 1328 | } |
1283 | 1329 | ||
1330 | static bool nfs4_update_lock_stateid(struct nfs4_lock_state *lsp, | ||
1331 | const nfs4_stateid *stateid) | ||
1332 | { | ||
1333 | struct nfs4_state *state = lsp->ls_state; | ||
1334 | bool ret = false; | ||
1335 | |||
1336 | spin_lock(&state->state_lock); | ||
1337 | if (!nfs4_stateid_match_other(stateid, &lsp->ls_stateid)) | ||
1338 | goto out_noupdate; | ||
1339 | if (!nfs4_stateid_is_newer(stateid, &lsp->ls_stateid)) | ||
1340 | goto out_noupdate; | ||
1341 | nfs4_stateid_copy(&lsp->ls_stateid, stateid); | ||
1342 | ret = true; | ||
1343 | out_noupdate: | ||
1344 | spin_unlock(&state->state_lock); | ||
1345 | return ret; | ||
1346 | } | ||
1284 | 1347 | ||
1285 | static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmode) | 1348 | static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmode) |
1286 | { | 1349 | { |
@@ -1679,8 +1742,8 @@ static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata) | |||
1679 | { | 1742 | { |
1680 | struct nfs4_opendata *data = calldata; | 1743 | struct nfs4_opendata *data = calldata; |
1681 | 1744 | ||
1682 | nfs40_setup_sequence(data->o_arg.server, &data->c_arg.seq_args, | 1745 | nfs40_setup_sequence(data->o_arg.server->nfs_client->cl_slot_tbl, |
1683 | &data->c_res.seq_res, task); | 1746 | &data->c_arg.seq_args, &data->c_res.seq_res, task); |
1684 | } | 1747 | } |
1685 | 1748 | ||
1686 | static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) | 1749 | static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) |
@@ -2587,6 +2650,11 @@ static void nfs4_close_done(struct rpc_task *task, void *data) | |||
2587 | case -NFS4ERR_OLD_STATEID: | 2650 | case -NFS4ERR_OLD_STATEID: |
2588 | case -NFS4ERR_BAD_STATEID: | 2651 | case -NFS4ERR_BAD_STATEID: |
2589 | case -NFS4ERR_EXPIRED: | 2652 | case -NFS4ERR_EXPIRED: |
2653 | if (!nfs4_stateid_match(&calldata->arg.stateid, | ||
2654 | &state->stateid)) { | ||
2655 | rpc_restart_call_prepare(task); | ||
2656 | goto out_release; | ||
2657 | } | ||
2590 | if (calldata->arg.fmode == 0) | 2658 | if (calldata->arg.fmode == 0) |
2591 | break; | 2659 | break; |
2592 | default: | 2660 | default: |
@@ -2619,6 +2687,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) | |||
2619 | is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags); | 2687 | is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags); |
2620 | is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags); | 2688 | is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags); |
2621 | is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags); | 2689 | is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags); |
2690 | nfs4_stateid_copy(&calldata->arg.stateid, &state->stateid); | ||
2622 | /* Calculate the change in open mode */ | 2691 | /* Calculate the change in open mode */ |
2623 | calldata->arg.fmode = 0; | 2692 | calldata->arg.fmode = 0; |
2624 | if (state->n_rdwr == 0) { | 2693 | if (state->n_rdwr == 0) { |
@@ -2653,6 +2722,9 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) | |||
2653 | goto out_wait; | 2722 | goto out_wait; |
2654 | } | 2723 | } |
2655 | } | 2724 | } |
2725 | calldata->arg.share_access = | ||
2726 | nfs4_map_atomic_open_share(NFS_SERVER(inode), | ||
2727 | calldata->arg.fmode, 0); | ||
2656 | 2728 | ||
2657 | nfs_fattr_init(calldata->res.fattr); | 2729 | nfs_fattr_init(calldata->res.fattr); |
2658 | calldata->timestamp = jiffies; | 2730 | calldata->timestamp = jiffies; |
@@ -2675,45 +2747,10 @@ static const struct rpc_call_ops nfs4_close_ops = { | |||
2675 | .rpc_release = nfs4_free_closedata, | 2747 | .rpc_release = nfs4_free_closedata, |
2676 | }; | 2748 | }; |
2677 | 2749 | ||
2678 | static bool nfs4_state_has_opener(struct nfs4_state *state) | ||
2679 | { | ||
2680 | /* first check existing openers */ | ||
2681 | if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0 && | ||
2682 | state->n_rdonly != 0) | ||
2683 | return true; | ||
2684 | |||
2685 | if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0 && | ||
2686 | state->n_wronly != 0) | ||
2687 | return true; | ||
2688 | |||
2689 | if (test_bit(NFS_O_RDWR_STATE, &state->flags) != 0 && | ||
2690 | state->n_rdwr != 0) | ||
2691 | return true; | ||
2692 | |||
2693 | return false; | ||
2694 | } | ||
2695 | |||
2696 | static bool nfs4_roc(struct inode *inode) | 2750 | static bool nfs4_roc(struct inode *inode) |
2697 | { | 2751 | { |
2698 | struct nfs_inode *nfsi = NFS_I(inode); | 2752 | if (!nfs_have_layout(inode)) |
2699 | struct nfs_open_context *ctx; | ||
2700 | struct nfs4_state *state; | ||
2701 | |||
2702 | spin_lock(&inode->i_lock); | ||
2703 | list_for_each_entry(ctx, &nfsi->open_files, list) { | ||
2704 | state = ctx->state; | ||
2705 | if (state == NULL) | ||
2706 | continue; | ||
2707 | if (nfs4_state_has_opener(state)) { | ||
2708 | spin_unlock(&inode->i_lock); | ||
2709 | return false; | ||
2710 | } | ||
2711 | } | ||
2712 | spin_unlock(&inode->i_lock); | ||
2713 | |||
2714 | if (nfs4_check_delegation(inode, FMODE_READ)) | ||
2715 | return false; | 2753 | return false; |
2716 | |||
2717 | return pnfs_roc(inode); | 2754 | return pnfs_roc(inode); |
2718 | } | 2755 | } |
2719 | 2756 | ||
@@ -2731,6 +2768,7 @@ static bool nfs4_roc(struct inode *inode) | |||
2731 | int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) | 2768 | int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) |
2732 | { | 2769 | { |
2733 | struct nfs_server *server = NFS_SERVER(state->inode); | 2770 | struct nfs_server *server = NFS_SERVER(state->inode); |
2771 | struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); | ||
2734 | struct nfs4_closedata *calldata; | 2772 | struct nfs4_closedata *calldata; |
2735 | struct nfs4_state_owner *sp = state->owner; | 2773 | struct nfs4_state_owner *sp = state->owner; |
2736 | struct rpc_task *task; | 2774 | struct rpc_task *task; |
@@ -2757,10 +2795,10 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) | |||
2757 | calldata->inode = state->inode; | 2795 | calldata->inode = state->inode; |
2758 | calldata->state = state; | 2796 | calldata->state = state; |
2759 | calldata->arg.fh = NFS_FH(state->inode); | 2797 | calldata->arg.fh = NFS_FH(state->inode); |
2760 | calldata->arg.stateid = &state->open_stateid; | ||
2761 | /* Serialization for the sequence id */ | 2798 | /* Serialization for the sequence id */ |
2762 | calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid, gfp_mask); | 2799 | alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid; |
2763 | if (calldata->arg.seqid == NULL) | 2800 | calldata->arg.seqid = alloc_seqid(&state->owner->so_seqid, gfp_mask); |
2801 | if (IS_ERR(calldata->arg.seqid)) | ||
2764 | goto out_free_calldata; | 2802 | goto out_free_calldata; |
2765 | calldata->arg.fmode = 0; | 2803 | calldata->arg.fmode = 0; |
2766 | calldata->arg.bitmask = server->cache_consistency_bitmask; | 2804 | calldata->arg.bitmask = server->cache_consistency_bitmask; |
@@ -5137,9 +5175,13 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) | |||
5137 | static void nfs4_delegreturn_release(void *calldata) | 5175 | static void nfs4_delegreturn_release(void *calldata) |
5138 | { | 5176 | { |
5139 | struct nfs4_delegreturndata *data = calldata; | 5177 | struct nfs4_delegreturndata *data = calldata; |
5178 | struct inode *inode = data->inode; | ||
5140 | 5179 | ||
5141 | if (data->roc) | 5180 | if (inode) { |
5142 | pnfs_roc_release(data->inode); | 5181 | if (data->roc) |
5182 | pnfs_roc_release(inode); | ||
5183 | nfs_iput_and_deactive(inode); | ||
5184 | } | ||
5143 | kfree(calldata); | 5185 | kfree(calldata); |
5144 | } | 5186 | } |
5145 | 5187 | ||
@@ -5196,9 +5238,9 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co | |||
5196 | nfs_fattr_init(data->res.fattr); | 5238 | nfs_fattr_init(data->res.fattr); |
5197 | data->timestamp = jiffies; | 5239 | data->timestamp = jiffies; |
5198 | data->rpc_status = 0; | 5240 | data->rpc_status = 0; |
5199 | data->inode = inode; | 5241 | data->inode = nfs_igrab_and_active(inode); |
5200 | data->roc = list_empty(&NFS_I(inode)->open_files) ? | 5242 | if (data->inode) |
5201 | pnfs_roc(inode) : false; | 5243 | data->roc = nfs4_roc(inode); |
5202 | 5244 | ||
5203 | task_setup_data.callback_data = data; | 5245 | task_setup_data.callback_data = data; |
5204 | msg.rpc_argp = &data->args; | 5246 | msg.rpc_argp = &data->args; |
@@ -5353,7 +5395,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, | |||
5353 | p->arg.fl = &p->fl; | 5395 | p->arg.fl = &p->fl; |
5354 | p->arg.seqid = seqid; | 5396 | p->arg.seqid = seqid; |
5355 | p->res.seqid = seqid; | 5397 | p->res.seqid = seqid; |
5356 | p->arg.stateid = &lsp->ls_stateid; | ||
5357 | p->lsp = lsp; | 5398 | p->lsp = lsp; |
5358 | atomic_inc(&lsp->ls_count); | 5399 | atomic_inc(&lsp->ls_count); |
5359 | /* Ensure we don't close file until we're done freeing locks! */ | 5400 | /* Ensure we don't close file until we're done freeing locks! */ |
@@ -5380,14 +5421,18 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) | |||
5380 | return; | 5421 | return; |
5381 | switch (task->tk_status) { | 5422 | switch (task->tk_status) { |
5382 | case 0: | 5423 | case 0: |
5383 | nfs4_stateid_copy(&calldata->lsp->ls_stateid, | ||
5384 | &calldata->res.stateid); | ||
5385 | renew_lease(calldata->server, calldata->timestamp); | 5424 | renew_lease(calldata->server, calldata->timestamp); |
5386 | break; | 5425 | do_vfs_lock(calldata->fl.fl_file, &calldata->fl); |
5426 | if (nfs4_update_lock_stateid(calldata->lsp, | ||
5427 | &calldata->res.stateid)) | ||
5428 | break; | ||
5387 | case -NFS4ERR_BAD_STATEID: | 5429 | case -NFS4ERR_BAD_STATEID: |
5388 | case -NFS4ERR_OLD_STATEID: | 5430 | case -NFS4ERR_OLD_STATEID: |
5389 | case -NFS4ERR_STALE_STATEID: | 5431 | case -NFS4ERR_STALE_STATEID: |
5390 | case -NFS4ERR_EXPIRED: | 5432 | case -NFS4ERR_EXPIRED: |
5433 | if (!nfs4_stateid_match(&calldata->arg.stateid, | ||
5434 | &calldata->lsp->ls_stateid)) | ||
5435 | rpc_restart_call_prepare(task); | ||
5391 | break; | 5436 | break; |
5392 | default: | 5437 | default: |
5393 | if (nfs4_async_handle_error(task, calldata->server, | 5438 | if (nfs4_async_handle_error(task, calldata->server, |
@@ -5403,6 +5448,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) | |||
5403 | 5448 | ||
5404 | if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) | 5449 | if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) |
5405 | goto out_wait; | 5450 | goto out_wait; |
5451 | nfs4_stateid_copy(&calldata->arg.stateid, &calldata->lsp->ls_stateid); | ||
5406 | if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) { | 5452 | if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) { |
5407 | /* Note: exit _without_ running nfs4_locku_done */ | 5453 | /* Note: exit _without_ running nfs4_locku_done */ |
5408 | goto out_no_action; | 5454 | goto out_no_action; |
@@ -5473,6 +5519,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * | |||
5473 | struct nfs_seqid *seqid; | 5519 | struct nfs_seqid *seqid; |
5474 | struct nfs4_lock_state *lsp; | 5520 | struct nfs4_lock_state *lsp; |
5475 | struct rpc_task *task; | 5521 | struct rpc_task *task; |
5522 | struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); | ||
5476 | int status = 0; | 5523 | int status = 0; |
5477 | unsigned char fl_flags = request->fl_flags; | 5524 | unsigned char fl_flags = request->fl_flags; |
5478 | 5525 | ||
@@ -5496,9 +5543,10 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * | |||
5496 | lsp = request->fl_u.nfs4_fl.owner; | 5543 | lsp = request->fl_u.nfs4_fl.owner; |
5497 | if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) == 0) | 5544 | if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) == 0) |
5498 | goto out; | 5545 | goto out; |
5499 | seqid = nfs_alloc_seqid(&lsp->ls_seqid, GFP_KERNEL); | 5546 | alloc_seqid = NFS_SERVER(inode)->nfs_client->cl_mvops->alloc_seqid; |
5547 | seqid = alloc_seqid(&lsp->ls_seqid, GFP_KERNEL); | ||
5500 | status = -ENOMEM; | 5548 | status = -ENOMEM; |
5501 | if (seqid == NULL) | 5549 | if (IS_ERR(seqid)) |
5502 | goto out; | 5550 | goto out; |
5503 | task = nfs4_do_unlck(request, nfs_file_open_context(request->fl_file), lsp, seqid); | 5551 | task = nfs4_do_unlck(request, nfs_file_open_context(request->fl_file), lsp, seqid); |
5504 | status = PTR_ERR(task); | 5552 | status = PTR_ERR(task); |
@@ -5531,6 +5579,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, | |||
5531 | struct nfs4_lockdata *p; | 5579 | struct nfs4_lockdata *p; |
5532 | struct inode *inode = lsp->ls_state->inode; | 5580 | struct inode *inode = lsp->ls_state->inode; |
5533 | struct nfs_server *server = NFS_SERVER(inode); | 5581 | struct nfs_server *server = NFS_SERVER(inode); |
5582 | struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); | ||
5534 | 5583 | ||
5535 | p = kzalloc(sizeof(*p), gfp_mask); | 5584 | p = kzalloc(sizeof(*p), gfp_mask); |
5536 | if (p == NULL) | 5585 | if (p == NULL) |
@@ -5539,12 +5588,12 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, | |||
5539 | p->arg.fh = NFS_FH(inode); | 5588 | p->arg.fh = NFS_FH(inode); |
5540 | p->arg.fl = &p->fl; | 5589 | p->arg.fl = &p->fl; |
5541 | p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid, gfp_mask); | 5590 | p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid, gfp_mask); |
5542 | if (p->arg.open_seqid == NULL) | 5591 | if (IS_ERR(p->arg.open_seqid)) |
5543 | goto out_free; | 5592 | goto out_free; |
5544 | p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid, gfp_mask); | 5593 | alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid; |
5545 | if (p->arg.lock_seqid == NULL) | 5594 | p->arg.lock_seqid = alloc_seqid(&lsp->ls_seqid, gfp_mask); |
5595 | if (IS_ERR(p->arg.lock_seqid)) | ||
5546 | goto out_free_seqid; | 5596 | goto out_free_seqid; |
5547 | p->arg.lock_stateid = &lsp->ls_stateid; | ||
5548 | p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; | 5597 | p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; |
5549 | p->arg.lock_owner.id = lsp->ls_seqid.owner_id; | 5598 | p->arg.lock_owner.id = lsp->ls_seqid.owner_id; |
5550 | p->arg.lock_owner.s_dev = server->s_dev; | 5599 | p->arg.lock_owner.s_dev = server->s_dev; |
@@ -5571,15 +5620,19 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) | |||
5571 | if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0) | 5620 | if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0) |
5572 | goto out_wait; | 5621 | goto out_wait; |
5573 | /* Do we need to do an open_to_lock_owner? */ | 5622 | /* Do we need to do an open_to_lock_owner? */ |
5574 | if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { | 5623 | if (!test_bit(NFS_LOCK_INITIALIZED, &data->lsp->ls_flags)) { |
5575 | if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) { | 5624 | if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) { |
5576 | goto out_release_lock_seqid; | 5625 | goto out_release_lock_seqid; |
5577 | } | 5626 | } |
5578 | data->arg.open_stateid = &state->open_stateid; | 5627 | nfs4_stateid_copy(&data->arg.open_stateid, |
5628 | &state->open_stateid); | ||
5579 | data->arg.new_lock_owner = 1; | 5629 | data->arg.new_lock_owner = 1; |
5580 | data->res.open_seqid = data->arg.open_seqid; | 5630 | data->res.open_seqid = data->arg.open_seqid; |
5581 | } else | 5631 | } else { |
5582 | data->arg.new_lock_owner = 0; | 5632 | data->arg.new_lock_owner = 0; |
5633 | nfs4_stateid_copy(&data->arg.lock_stateid, | ||
5634 | &data->lsp->ls_stateid); | ||
5635 | } | ||
5583 | if (!nfs4_valid_open_stateid(state)) { | 5636 | if (!nfs4_valid_open_stateid(state)) { |
5584 | data->rpc_status = -EBADF; | 5637 | data->rpc_status = -EBADF; |
5585 | task->tk_action = NULL; | 5638 | task->tk_action = NULL; |
@@ -5603,6 +5656,7 @@ out_wait: | |||
5603 | static void nfs4_lock_done(struct rpc_task *task, void *calldata) | 5656 | static void nfs4_lock_done(struct rpc_task *task, void *calldata) |
5604 | { | 5657 | { |
5605 | struct nfs4_lockdata *data = calldata; | 5658 | struct nfs4_lockdata *data = calldata; |
5659 | struct nfs4_lock_state *lsp = data->lsp; | ||
5606 | 5660 | ||
5607 | dprintk("%s: begin!\n", __func__); | 5661 | dprintk("%s: begin!\n", __func__); |
5608 | 5662 | ||
@@ -5610,18 +5664,36 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) | |||
5610 | return; | 5664 | return; |
5611 | 5665 | ||
5612 | data->rpc_status = task->tk_status; | 5666 | data->rpc_status = task->tk_status; |
5613 | if (data->arg.new_lock_owner != 0) { | 5667 | switch (task->tk_status) { |
5614 | if (data->rpc_status == 0) | 5668 | case 0: |
5615 | nfs_confirm_seqid(&data->lsp->ls_seqid, 0); | 5669 | renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), |
5616 | else | 5670 | data->timestamp); |
5617 | goto out; | 5671 | if (data->arg.new_lock) { |
5618 | } | 5672 | data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); |
5619 | if (data->rpc_status == 0) { | 5673 | if (do_vfs_lock(data->fl.fl_file, &data->fl) < 0) { |
5620 | nfs4_stateid_copy(&data->lsp->ls_stateid, &data->res.stateid); | 5674 | rpc_restart_call_prepare(task); |
5621 | set_bit(NFS_LOCK_INITIALIZED, &data->lsp->ls_flags); | 5675 | break; |
5622 | renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); | 5676 | } |
5677 | } | ||
5678 | if (data->arg.new_lock_owner != 0) { | ||
5679 | nfs_confirm_seqid(&lsp->ls_seqid, 0); | ||
5680 | nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid); | ||
5681 | set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); | ||
5682 | } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid)) | ||
5683 | rpc_restart_call_prepare(task); | ||
5684 | break; | ||
5685 | case -NFS4ERR_BAD_STATEID: | ||
5686 | case -NFS4ERR_OLD_STATEID: | ||
5687 | case -NFS4ERR_STALE_STATEID: | ||
5688 | case -NFS4ERR_EXPIRED: | ||
5689 | if (data->arg.new_lock_owner != 0) { | ||
5690 | if (!nfs4_stateid_match(&data->arg.open_stateid, | ||
5691 | &lsp->ls_state->open_stateid)) | ||
5692 | rpc_restart_call_prepare(task); | ||
5693 | } else if (!nfs4_stateid_match(&data->arg.lock_stateid, | ||
5694 | &lsp->ls_stateid)) | ||
5695 | rpc_restart_call_prepare(task); | ||
5623 | } | 5696 | } |
5624 | out: | ||
5625 | dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); | 5697 | dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); |
5626 | } | 5698 | } |
5627 | 5699 | ||
@@ -5702,7 +5774,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f | |||
5702 | if (recovery_type == NFS_LOCK_RECLAIM) | 5774 | if (recovery_type == NFS_LOCK_RECLAIM) |
5703 | data->arg.reclaim = NFS_LOCK_RECLAIM; | 5775 | data->arg.reclaim = NFS_LOCK_RECLAIM; |
5704 | nfs4_set_sequence_privileged(&data->arg.seq_args); | 5776 | nfs4_set_sequence_privileged(&data->arg.seq_args); |
5705 | } | 5777 | } else |
5778 | data->arg.new_lock = 1; | ||
5706 | task = rpc_run_task(&task_setup_data); | 5779 | task = rpc_run_task(&task_setup_data); |
5707 | if (IS_ERR(task)) | 5780 | if (IS_ERR(task)) |
5708 | return PTR_ERR(task); | 5781 | return PTR_ERR(task); |
@@ -5826,10 +5899,8 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques | |||
5826 | 5899 | ||
5827 | static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) | 5900 | static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) |
5828 | { | 5901 | { |
5829 | struct nfs4_state_owner *sp = state->owner; | ||
5830 | struct nfs_inode *nfsi = NFS_I(state->inode); | 5902 | struct nfs_inode *nfsi = NFS_I(state->inode); |
5831 | unsigned char fl_flags = request->fl_flags; | 5903 | unsigned char fl_flags = request->fl_flags; |
5832 | unsigned int seq; | ||
5833 | int status = -ENOLCK; | 5904 | int status = -ENOLCK; |
5834 | 5905 | ||
5835 | if ((fl_flags & FL_POSIX) && | 5906 | if ((fl_flags & FL_POSIX) && |
@@ -5849,25 +5920,11 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock | |||
5849 | /* ...but avoid races with delegation recall... */ | 5920 | /* ...but avoid races with delegation recall... */ |
5850 | request->fl_flags = fl_flags & ~FL_SLEEP; | 5921 | request->fl_flags = fl_flags & ~FL_SLEEP; |
5851 | status = do_vfs_lock(request->fl_file, request); | 5922 | status = do_vfs_lock(request->fl_file, request); |
5852 | goto out_unlock; | 5923 | up_read(&nfsi->rwsem); |
5853 | } | ||
5854 | seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); | ||
5855 | up_read(&nfsi->rwsem); | ||
5856 | status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW); | ||
5857 | if (status != 0) | ||
5858 | goto out; | 5924 | goto out; |
5859 | down_read(&nfsi->rwsem); | ||
5860 | if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) { | ||
5861 | status = -NFS4ERR_DELAY; | ||
5862 | goto out_unlock; | ||
5863 | } | 5925 | } |
5864 | /* Note: we always want to sleep here! */ | ||
5865 | request->fl_flags = fl_flags | FL_SLEEP; | ||
5866 | if (do_vfs_lock(request->fl_file, request) < 0) | ||
5867 | printk(KERN_WARNING "NFS: %s: VFS is out of sync with lock " | ||
5868 | "manager!\n", __func__); | ||
5869 | out_unlock: | ||
5870 | up_read(&nfsi->rwsem); | 5926 | up_read(&nfsi->rwsem); |
5927 | status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW); | ||
5871 | out: | 5928 | out: |
5872 | request->fl_flags = fl_flags; | 5929 | request->fl_flags = fl_flags; |
5873 | return status; | 5930 | return status; |
@@ -5974,8 +6031,8 @@ static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata | |||
5974 | { | 6031 | { |
5975 | struct nfs_release_lockowner_data *data = calldata; | 6032 | struct nfs_release_lockowner_data *data = calldata; |
5976 | struct nfs_server *server = data->server; | 6033 | struct nfs_server *server = data->server; |
5977 | nfs40_setup_sequence(server, &data->args.seq_args, | 6034 | nfs40_setup_sequence(server->nfs_client->cl_slot_tbl, |
5978 | &data->res.seq_res, task); | 6035 | &data->args.seq_args, &data->res.seq_res, task); |
5979 | data->args.lock_owner.clientid = server->nfs_client->cl_clientid; | 6036 | data->args.lock_owner.clientid = server->nfs_client->cl_clientid; |
5980 | data->timestamp = jiffies; | 6037 | data->timestamp = jiffies; |
5981 | } | 6038 | } |
@@ -7537,6 +7594,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) | |||
7537 | return; | 7594 | return; |
7538 | if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, | 7595 | if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, |
7539 | NFS_I(lgp->args.inode)->layout, | 7596 | NFS_I(lgp->args.inode)->layout, |
7597 | &lgp->args.range, | ||
7540 | lgp->args.ctx->state)) { | 7598 | lgp->args.ctx->state)) { |
7541 | rpc_exit(task, NFS4_OK); | 7599 | rpc_exit(task, NFS4_OK); |
7542 | } | 7600 | } |
@@ -7792,9 +7850,13 @@ static void nfs4_layoutreturn_release(void *calldata) | |||
7792 | spin_lock(&lo->plh_inode->i_lock); | 7850 | spin_lock(&lo->plh_inode->i_lock); |
7793 | if (lrp->res.lrs_present) | 7851 | if (lrp->res.lrs_present) |
7794 | pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); | 7852 | pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); |
7853 | pnfs_clear_layoutreturn_waitbit(lo); | ||
7854 | clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags); | ||
7855 | rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); | ||
7795 | lo->plh_block_lgets--; | 7856 | lo->plh_block_lgets--; |
7796 | spin_unlock(&lo->plh_inode->i_lock); | 7857 | spin_unlock(&lo->plh_inode->i_lock); |
7797 | pnfs_put_layout_hdr(lrp->args.layout); | 7858 | pnfs_put_layout_hdr(lrp->args.layout); |
7859 | nfs_iput_and_deactive(lrp->inode); | ||
7798 | kfree(calldata); | 7860 | kfree(calldata); |
7799 | dprintk("<-- %s\n", __func__); | 7861 | dprintk("<-- %s\n", __func__); |
7800 | } | 7862 | } |
@@ -7805,7 +7867,7 @@ static const struct rpc_call_ops nfs4_layoutreturn_call_ops = { | |||
7805 | .rpc_release = nfs4_layoutreturn_release, | 7867 | .rpc_release = nfs4_layoutreturn_release, |
7806 | }; | 7868 | }; |
7807 | 7869 | ||
7808 | int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) | 7870 | int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) |
7809 | { | 7871 | { |
7810 | struct rpc_task *task; | 7872 | struct rpc_task *task; |
7811 | struct rpc_message msg = { | 7873 | struct rpc_message msg = { |
@@ -7820,14 +7882,23 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) | |||
7820 | .callback_ops = &nfs4_layoutreturn_call_ops, | 7882 | .callback_ops = &nfs4_layoutreturn_call_ops, |
7821 | .callback_data = lrp, | 7883 | .callback_data = lrp, |
7822 | }; | 7884 | }; |
7823 | int status; | 7885 | int status = 0; |
7824 | 7886 | ||
7825 | dprintk("--> %s\n", __func__); | 7887 | dprintk("--> %s\n", __func__); |
7888 | if (!sync) { | ||
7889 | lrp->inode = nfs_igrab_and_active(lrp->args.inode); | ||
7890 | if (!lrp->inode) { | ||
7891 | nfs4_layoutreturn_release(lrp); | ||
7892 | return -EAGAIN; | ||
7893 | } | ||
7894 | task_setup_data.flags |= RPC_TASK_ASYNC; | ||
7895 | } | ||
7826 | nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1); | 7896 | nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1); |
7827 | task = rpc_run_task(&task_setup_data); | 7897 | task = rpc_run_task(&task_setup_data); |
7828 | if (IS_ERR(task)) | 7898 | if (IS_ERR(task)) |
7829 | return PTR_ERR(task); | 7899 | return PTR_ERR(task); |
7830 | status = task->tk_status; | 7900 | if (sync) |
7901 | status = task->tk_status; | ||
7831 | trace_nfs4_layoutreturn(lrp->args.inode, status); | 7902 | trace_nfs4_layoutreturn(lrp->args.inode, status); |
7832 | dprintk("<-- %s status=%d\n", __func__, status); | 7903 | dprintk("<-- %s status=%d\n", __func__, status); |
7833 | rpc_put_task(task); | 7904 | rpc_put_task(task); |
@@ -7921,6 +7992,7 @@ static void nfs4_layoutcommit_release(void *calldata) | |||
7921 | nfs_post_op_update_inode_force_wcc(data->args.inode, | 7992 | nfs_post_op_update_inode_force_wcc(data->args.inode, |
7922 | data->res.fattr); | 7993 | data->res.fattr); |
7923 | put_rpccred(data->cred); | 7994 | put_rpccred(data->cred); |
7995 | nfs_iput_and_deactive(data->inode); | ||
7924 | kfree(data); | 7996 | kfree(data); |
7925 | } | 7997 | } |
7926 | 7998 | ||
@@ -7945,7 +8017,6 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) | |||
7945 | .rpc_message = &msg, | 8017 | .rpc_message = &msg, |
7946 | .callback_ops = &nfs4_layoutcommit_ops, | 8018 | .callback_ops = &nfs4_layoutcommit_ops, |
7947 | .callback_data = data, | 8019 | .callback_data = data, |
7948 | .flags = RPC_TASK_ASYNC, | ||
7949 | }; | 8020 | }; |
7950 | struct rpc_task *task; | 8021 | struct rpc_task *task; |
7951 | int status = 0; | 8022 | int status = 0; |
@@ -7956,18 +8027,21 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) | |||
7956 | data->args.lastbytewritten, | 8027 | data->args.lastbytewritten, |
7957 | data->args.inode->i_ino); | 8028 | data->args.inode->i_ino); |
7958 | 8029 | ||
8030 | if (!sync) { | ||
8031 | data->inode = nfs_igrab_and_active(data->args.inode); | ||
8032 | if (data->inode == NULL) { | ||
8033 | nfs4_layoutcommit_release(data); | ||
8034 | return -EAGAIN; | ||
8035 | } | ||
8036 | task_setup_data.flags = RPC_TASK_ASYNC; | ||
8037 | } | ||
7959 | nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); | 8038 | nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); |
7960 | task = rpc_run_task(&task_setup_data); | 8039 | task = rpc_run_task(&task_setup_data); |
7961 | if (IS_ERR(task)) | 8040 | if (IS_ERR(task)) |
7962 | return PTR_ERR(task); | 8041 | return PTR_ERR(task); |
7963 | if (sync == false) | 8042 | if (sync) |
7964 | goto out; | 8043 | status = task->tk_status; |
7965 | status = nfs4_wait_for_completion_rpc_task(task); | ||
7966 | if (status != 0) | ||
7967 | goto out; | ||
7968 | status = task->tk_status; | ||
7969 | trace_nfs4_layoutcommit(data->args.inode, status); | 8044 | trace_nfs4_layoutcommit(data->args.inode, status); |
7970 | out: | ||
7971 | dprintk("%s: status %d\n", __func__, status); | 8045 | dprintk("%s: status %d\n", __func__, status); |
7972 | rpc_put_task(task); | 8046 | rpc_put_task(task); |
7973 | return status; | 8047 | return status; |
@@ -8395,6 +8469,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { | |||
8395 | .match_stateid = nfs4_match_stateid, | 8469 | .match_stateid = nfs4_match_stateid, |
8396 | .find_root_sec = nfs4_find_root_sec, | 8470 | .find_root_sec = nfs4_find_root_sec, |
8397 | .free_lock_state = nfs4_release_lockowner, | 8471 | .free_lock_state = nfs4_release_lockowner, |
8472 | .alloc_seqid = nfs_alloc_seqid, | ||
8398 | .call_sync_ops = &nfs40_call_sync_ops, | 8473 | .call_sync_ops = &nfs40_call_sync_ops, |
8399 | .reboot_recovery_ops = &nfs40_reboot_recovery_ops, | 8474 | .reboot_recovery_ops = &nfs40_reboot_recovery_ops, |
8400 | .nograce_recovery_ops = &nfs40_nograce_recovery_ops, | 8475 | .nograce_recovery_ops = &nfs40_nograce_recovery_ops, |
@@ -8403,6 +8478,12 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { | |||
8403 | }; | 8478 | }; |
8404 | 8479 | ||
8405 | #if defined(CONFIG_NFS_V4_1) | 8480 | #if defined(CONFIG_NFS_V4_1) |
8481 | static struct nfs_seqid * | ||
8482 | nfs_alloc_no_seqid(struct nfs_seqid_counter *arg1, gfp_t arg2) | ||
8483 | { | ||
8484 | return NULL; | ||
8485 | } | ||
8486 | |||
8406 | static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | 8487 | static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { |
8407 | .minor_version = 1, | 8488 | .minor_version = 1, |
8408 | .init_caps = NFS_CAP_READDIRPLUS | 8489 | .init_caps = NFS_CAP_READDIRPLUS |
@@ -8416,6 +8497,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | |||
8416 | .match_stateid = nfs41_match_stateid, | 8497 | .match_stateid = nfs41_match_stateid, |
8417 | .find_root_sec = nfs41_find_root_sec, | 8498 | .find_root_sec = nfs41_find_root_sec, |
8418 | .free_lock_state = nfs41_free_lock_state, | 8499 | .free_lock_state = nfs41_free_lock_state, |
8500 | .alloc_seqid = nfs_alloc_no_seqid, | ||
8419 | .call_sync_ops = &nfs41_call_sync_ops, | 8501 | .call_sync_ops = &nfs41_call_sync_ops, |
8420 | .reboot_recovery_ops = &nfs41_reboot_recovery_ops, | 8502 | .reboot_recovery_ops = &nfs41_reboot_recovery_ops, |
8421 | .nograce_recovery_ops = &nfs41_nograce_recovery_ops, | 8503 | .nograce_recovery_ops = &nfs41_nograce_recovery_ops, |
@@ -8442,6 +8524,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | |||
8442 | .find_root_sec = nfs41_find_root_sec, | 8524 | .find_root_sec = nfs41_find_root_sec, |
8443 | .free_lock_state = nfs41_free_lock_state, | 8525 | .free_lock_state = nfs41_free_lock_state, |
8444 | .call_sync_ops = &nfs41_call_sync_ops, | 8526 | .call_sync_ops = &nfs41_call_sync_ops, |
8527 | .alloc_seqid = nfs_alloc_no_seqid, | ||
8445 | .reboot_recovery_ops = &nfs41_reboot_recovery_ops, | 8528 | .reboot_recovery_ops = &nfs41_reboot_recovery_ops, |
8446 | .nograce_recovery_ops = &nfs41_nograce_recovery_ops, | 8529 | .nograce_recovery_ops = &nfs41_nograce_recovery_ops, |
8447 | .state_renewal_ops = &nfs41_state_renewal_ops, | 8530 | .state_renewal_ops = &nfs41_state_renewal_ops, |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5194933ed419..5ad908e9ce9c 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -1003,11 +1003,11 @@ struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_m | |||
1003 | struct nfs_seqid *new; | 1003 | struct nfs_seqid *new; |
1004 | 1004 | ||
1005 | new = kmalloc(sizeof(*new), gfp_mask); | 1005 | new = kmalloc(sizeof(*new), gfp_mask); |
1006 | if (new != NULL) { | 1006 | if (new == NULL) |
1007 | new->sequence = counter; | 1007 | return ERR_PTR(-ENOMEM); |
1008 | INIT_LIST_HEAD(&new->list); | 1008 | new->sequence = counter; |
1009 | new->task = NULL; | 1009 | INIT_LIST_HEAD(&new->list); |
1010 | } | 1010 | new->task = NULL; |
1011 | return new; | 1011 | return new; |
1012 | } | 1012 | } |
1013 | 1013 | ||
@@ -1015,7 +1015,7 @@ void nfs_release_seqid(struct nfs_seqid *seqid) | |||
1015 | { | 1015 | { |
1016 | struct nfs_seqid_counter *sequence; | 1016 | struct nfs_seqid_counter *sequence; |
1017 | 1017 | ||
1018 | if (list_empty(&seqid->list)) | 1018 | if (seqid == NULL || list_empty(&seqid->list)) |
1019 | return; | 1019 | return; |
1020 | sequence = seqid->sequence; | 1020 | sequence = seqid->sequence; |
1021 | spin_lock(&sequence->lock); | 1021 | spin_lock(&sequence->lock); |
@@ -1071,13 +1071,15 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) | |||
1071 | 1071 | ||
1072 | void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) | 1072 | void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) |
1073 | { | 1073 | { |
1074 | struct nfs4_state_owner *sp = container_of(seqid->sequence, | 1074 | struct nfs4_state_owner *sp; |
1075 | struct nfs4_state_owner, so_seqid); | 1075 | |
1076 | struct nfs_server *server = sp->so_server; | 1076 | if (seqid == NULL) |
1077 | return; | ||
1077 | 1078 | ||
1079 | sp = container_of(seqid->sequence, struct nfs4_state_owner, so_seqid); | ||
1078 | if (status == -NFS4ERR_BAD_SEQID) | 1080 | if (status == -NFS4ERR_BAD_SEQID) |
1079 | nfs4_drop_state_owner(sp); | 1081 | nfs4_drop_state_owner(sp); |
1080 | if (!nfs4_has_session(server->nfs_client)) | 1082 | if (!nfs4_has_session(sp->so_server->nfs_client)) |
1081 | nfs_increment_seqid(status, seqid); | 1083 | nfs_increment_seqid(status, seqid); |
1082 | } | 1084 | } |
1083 | 1085 | ||
@@ -1088,14 +1090,18 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) | |||
1088 | */ | 1090 | */ |
1089 | void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) | 1091 | void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) |
1090 | { | 1092 | { |
1091 | nfs_increment_seqid(status, seqid); | 1093 | if (seqid != NULL) |
1094 | nfs_increment_seqid(status, seqid); | ||
1092 | } | 1095 | } |
1093 | 1096 | ||
1094 | int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) | 1097 | int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) |
1095 | { | 1098 | { |
1096 | struct nfs_seqid_counter *sequence = seqid->sequence; | 1099 | struct nfs_seqid_counter *sequence; |
1097 | int status = 0; | 1100 | int status = 0; |
1098 | 1101 | ||
1102 | if (seqid == NULL) | ||
1103 | goto out; | ||
1104 | sequence = seqid->sequence; | ||
1099 | spin_lock(&sequence->lock); | 1105 | spin_lock(&sequence->lock); |
1100 | seqid->task = task; | 1106 | seqid->task = task; |
1101 | if (list_empty(&seqid->list)) | 1107 | if (list_empty(&seqid->list)) |
@@ -1106,6 +1112,7 @@ int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) | |||
1106 | status = -EAGAIN; | 1112 | status = -EAGAIN; |
1107 | unlock: | 1113 | unlock: |
1108 | spin_unlock(&sequence->lock); | 1114 | spin_unlock(&sequence->lock); |
1115 | out: | ||
1109 | return status; | 1116 | return status; |
1110 | } | 1117 | } |
1111 | 1118 | ||
@@ -1366,49 +1373,55 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ | |||
1366 | struct nfs_inode *nfsi = NFS_I(inode); | 1373 | struct nfs_inode *nfsi = NFS_I(inode); |
1367 | struct file_lock *fl; | 1374 | struct file_lock *fl; |
1368 | int status = 0; | 1375 | int status = 0; |
1376 | struct file_lock_context *flctx = inode->i_flctx; | ||
1377 | struct list_head *list; | ||
1369 | 1378 | ||
1370 | if (inode->i_flock == NULL) | 1379 | if (flctx == NULL) |
1371 | return 0; | 1380 | return 0; |
1372 | 1381 | ||
1382 | list = &flctx->flc_posix; | ||
1383 | |||
1373 | /* Guard against delegation returns and new lock/unlock calls */ | 1384 | /* Guard against delegation returns and new lock/unlock calls */ |
1374 | down_write(&nfsi->rwsem); | 1385 | down_write(&nfsi->rwsem); |
1375 | /* Protect inode->i_flock using the BKL */ | 1386 | spin_lock(&flctx->flc_lock); |
1376 | spin_lock(&inode->i_lock); | 1387 | restart: |
1377 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 1388 | list_for_each_entry(fl, list, fl_list) { |
1378 | if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) | ||
1379 | continue; | ||
1380 | if (nfs_file_open_context(fl->fl_file)->state != state) | 1389 | if (nfs_file_open_context(fl->fl_file)->state != state) |
1381 | continue; | 1390 | continue; |
1382 | spin_unlock(&inode->i_lock); | 1391 | spin_unlock(&flctx->flc_lock); |
1383 | status = ops->recover_lock(state, fl); | 1392 | status = ops->recover_lock(state, fl); |
1384 | switch (status) { | 1393 | switch (status) { |
1385 | case 0: | 1394 | case 0: |
1386 | break; | 1395 | break; |
1387 | case -ESTALE: | 1396 | case -ESTALE: |
1388 | case -NFS4ERR_ADMIN_REVOKED: | 1397 | case -NFS4ERR_ADMIN_REVOKED: |
1389 | case -NFS4ERR_STALE_STATEID: | 1398 | case -NFS4ERR_STALE_STATEID: |
1390 | case -NFS4ERR_BAD_STATEID: | 1399 | case -NFS4ERR_BAD_STATEID: |
1391 | case -NFS4ERR_EXPIRED: | 1400 | case -NFS4ERR_EXPIRED: |
1392 | case -NFS4ERR_NO_GRACE: | 1401 | case -NFS4ERR_NO_GRACE: |
1393 | case -NFS4ERR_STALE_CLIENTID: | 1402 | case -NFS4ERR_STALE_CLIENTID: |
1394 | case -NFS4ERR_BADSESSION: | 1403 | case -NFS4ERR_BADSESSION: |
1395 | case -NFS4ERR_BADSLOT: | 1404 | case -NFS4ERR_BADSLOT: |
1396 | case -NFS4ERR_BAD_HIGH_SLOT: | 1405 | case -NFS4ERR_BAD_HIGH_SLOT: |
1397 | case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: | 1406 | case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: |
1398 | goto out; | 1407 | goto out; |
1399 | default: | 1408 | default: |
1400 | printk(KERN_ERR "NFS: %s: unhandled error %d\n", | 1409 | pr_err("NFS: %s: unhandled error %d\n", |
1401 | __func__, status); | 1410 | __func__, status); |
1402 | case -ENOMEM: | 1411 | case -ENOMEM: |
1403 | case -NFS4ERR_DENIED: | 1412 | case -NFS4ERR_DENIED: |
1404 | case -NFS4ERR_RECLAIM_BAD: | 1413 | case -NFS4ERR_RECLAIM_BAD: |
1405 | case -NFS4ERR_RECLAIM_CONFLICT: | 1414 | case -NFS4ERR_RECLAIM_CONFLICT: |
1406 | /* kill_proc(fl->fl_pid, SIGLOST, 1); */ | 1415 | /* kill_proc(fl->fl_pid, SIGLOST, 1); */ |
1407 | status = 0; | 1416 | status = 0; |
1408 | } | 1417 | } |
1409 | spin_lock(&inode->i_lock); | 1418 | spin_lock(&flctx->flc_lock); |
1410 | } | 1419 | } |
1411 | spin_unlock(&inode->i_lock); | 1420 | if (list == &flctx->flc_posix) { |
1421 | list = &flctx->flc_flock; | ||
1422 | goto restart; | ||
1423 | } | ||
1424 | spin_unlock(&flctx->flc_lock); | ||
1412 | out: | 1425 | out: |
1413 | up_write(&nfsi->rwsem); | 1426 | up_write(&nfsi->rwsem); |
1414 | return status; | 1427 | return status; |
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 6f340f02f2ba..75090feeafad 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c | |||
@@ -53,7 +53,6 @@ static const struct super_operations nfs4_sops = { | |||
53 | .destroy_inode = nfs_destroy_inode, | 53 | .destroy_inode = nfs_destroy_inode, |
54 | .write_inode = nfs4_write_inode, | 54 | .write_inode = nfs4_write_inode, |
55 | .drop_inode = nfs_drop_inode, | 55 | .drop_inode = nfs_drop_inode, |
56 | .put_super = nfs_put_super, | ||
57 | .statfs = nfs_statfs, | 56 | .statfs = nfs_statfs, |
58 | .evict_inode = nfs4_evict_inode, | 57 | .evict_inode = nfs4_evict_inode, |
59 | .umount_begin = nfs_umount_begin, | 58 | .umount_begin = nfs_umount_begin, |
@@ -346,6 +345,9 @@ out: | |||
346 | 345 | ||
347 | static void __exit exit_nfs_v4(void) | 346 | static void __exit exit_nfs_v4(void) |
348 | { | 347 | { |
348 | /* Not called in the _init(), conditionally loaded */ | ||
349 | nfs4_pnfs_v3_ds_connect_unload(); | ||
350 | |||
349 | unregister_nfs_version(&nfs_v4); | 351 | unregister_nfs_version(&nfs_v4); |
350 | nfs4_unregister_sysctl(); | 352 | nfs4_unregister_sysctl(); |
351 | nfs_idmap_quit(); | 353 | nfs_idmap_quit(); |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index cb4376b78ed9..e23a0a664e12 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -946,7 +946,10 @@ static void encode_uint64(struct xdr_stream *xdr, u64 n) | |||
946 | static void encode_nfs4_seqid(struct xdr_stream *xdr, | 946 | static void encode_nfs4_seqid(struct xdr_stream *xdr, |
947 | const struct nfs_seqid *seqid) | 947 | const struct nfs_seqid *seqid) |
948 | { | 948 | { |
949 | encode_uint32(xdr, seqid->sequence->counter); | 949 | if (seqid != NULL) |
950 | encode_uint32(xdr, seqid->sequence->counter); | ||
951 | else | ||
952 | encode_uint32(xdr, 0); | ||
950 | } | 953 | } |
951 | 954 | ||
952 | static void encode_compound_hdr(struct xdr_stream *xdr, | 955 | static void encode_compound_hdr(struct xdr_stream *xdr, |
@@ -1125,7 +1128,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg | |||
1125 | { | 1128 | { |
1126 | encode_op_hdr(xdr, OP_CLOSE, decode_close_maxsz, hdr); | 1129 | encode_op_hdr(xdr, OP_CLOSE, decode_close_maxsz, hdr); |
1127 | encode_nfs4_seqid(xdr, arg->seqid); | 1130 | encode_nfs4_seqid(xdr, arg->seqid); |
1128 | encode_nfs4_stateid(xdr, arg->stateid); | 1131 | encode_nfs4_stateid(xdr, &arg->stateid); |
1129 | } | 1132 | } |
1130 | 1133 | ||
1131 | static void encode_commit(struct xdr_stream *xdr, const struct nfs_commitargs *args, struct compound_hdr *hdr) | 1134 | static void encode_commit(struct xdr_stream *xdr, const struct nfs_commitargs *args, struct compound_hdr *hdr) |
@@ -1301,12 +1304,12 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args | |||
1301 | *p = cpu_to_be32(args->new_lock_owner); | 1304 | *p = cpu_to_be32(args->new_lock_owner); |
1302 | if (args->new_lock_owner){ | 1305 | if (args->new_lock_owner){ |
1303 | encode_nfs4_seqid(xdr, args->open_seqid); | 1306 | encode_nfs4_seqid(xdr, args->open_seqid); |
1304 | encode_nfs4_stateid(xdr, args->open_stateid); | 1307 | encode_nfs4_stateid(xdr, &args->open_stateid); |
1305 | encode_nfs4_seqid(xdr, args->lock_seqid); | 1308 | encode_nfs4_seqid(xdr, args->lock_seqid); |
1306 | encode_lockowner(xdr, &args->lock_owner); | 1309 | encode_lockowner(xdr, &args->lock_owner); |
1307 | } | 1310 | } |
1308 | else { | 1311 | else { |
1309 | encode_nfs4_stateid(xdr, args->lock_stateid); | 1312 | encode_nfs4_stateid(xdr, &args->lock_stateid); |
1310 | encode_nfs4_seqid(xdr, args->lock_seqid); | 1313 | encode_nfs4_seqid(xdr, args->lock_seqid); |
1311 | } | 1314 | } |
1312 | } | 1315 | } |
@@ -1330,7 +1333,7 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar | |||
1330 | encode_op_hdr(xdr, OP_LOCKU, decode_locku_maxsz, hdr); | 1333 | encode_op_hdr(xdr, OP_LOCKU, decode_locku_maxsz, hdr); |
1331 | encode_uint32(xdr, nfs4_lock_type(args->fl, 0)); | 1334 | encode_uint32(xdr, nfs4_lock_type(args->fl, 0)); |
1332 | encode_nfs4_seqid(xdr, args->seqid); | 1335 | encode_nfs4_seqid(xdr, args->seqid); |
1333 | encode_nfs4_stateid(xdr, args->stateid); | 1336 | encode_nfs4_stateid(xdr, &args->stateid); |
1334 | p = reserve_space(xdr, 16); | 1337 | p = reserve_space(xdr, 16); |
1335 | p = xdr_encode_hyper(p, args->fl->fl_start); | 1338 | p = xdr_encode_hyper(p, args->fl->fl_start); |
1336 | xdr_encode_hyper(p, nfs4_lock_length(args->fl)); | 1339 | xdr_encode_hyper(p, nfs4_lock_length(args->fl)); |
@@ -1348,24 +1351,12 @@ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struc | |||
1348 | encode_string(xdr, name->len, name->name); | 1351 | encode_string(xdr, name->len, name->name); |
1349 | } | 1352 | } |
1350 | 1353 | ||
1351 | static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode) | 1354 | static void encode_share_access(struct xdr_stream *xdr, u32 share_access) |
1352 | { | 1355 | { |
1353 | __be32 *p; | 1356 | __be32 *p; |
1354 | 1357 | ||
1355 | p = reserve_space(xdr, 8); | 1358 | p = reserve_space(xdr, 8); |
1356 | switch (fmode & (FMODE_READ|FMODE_WRITE)) { | 1359 | *p++ = cpu_to_be32(share_access); |
1357 | case FMODE_READ: | ||
1358 | *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_READ); | ||
1359 | break; | ||
1360 | case FMODE_WRITE: | ||
1361 | *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_WRITE); | ||
1362 | break; | ||
1363 | case FMODE_READ|FMODE_WRITE: | ||
1364 | *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_BOTH); | ||
1365 | break; | ||
1366 | default: | ||
1367 | *p++ = cpu_to_be32(0); | ||
1368 | } | ||
1369 | *p = cpu_to_be32(0); /* for linux, share_deny = 0 always */ | 1360 | *p = cpu_to_be32(0); /* for linux, share_deny = 0 always */ |
1370 | } | 1361 | } |
1371 | 1362 | ||
@@ -1377,7 +1368,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena | |||
1377 | * owner 4 = 32 | 1368 | * owner 4 = 32 |
1378 | */ | 1369 | */ |
1379 | encode_nfs4_seqid(xdr, arg->seqid); | 1370 | encode_nfs4_seqid(xdr, arg->seqid); |
1380 | encode_share_access(xdr, arg->fmode); | 1371 | encode_share_access(xdr, arg->share_access); |
1381 | p = reserve_space(xdr, 36); | 1372 | p = reserve_space(xdr, 36); |
1382 | p = xdr_encode_hyper(p, arg->clientid); | 1373 | p = xdr_encode_hyper(p, arg->clientid); |
1383 | *p++ = cpu_to_be32(24); | 1374 | *p++ = cpu_to_be32(24); |
@@ -1530,9 +1521,9 @@ static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_co | |||
1530 | static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) | 1521 | static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) |
1531 | { | 1522 | { |
1532 | encode_op_hdr(xdr, OP_OPEN_DOWNGRADE, decode_open_downgrade_maxsz, hdr); | 1523 | encode_op_hdr(xdr, OP_OPEN_DOWNGRADE, decode_open_downgrade_maxsz, hdr); |
1533 | encode_nfs4_stateid(xdr, arg->stateid); | 1524 | encode_nfs4_stateid(xdr, &arg->stateid); |
1534 | encode_nfs4_seqid(xdr, arg->seqid); | 1525 | encode_nfs4_seqid(xdr, arg->seqid); |
1535 | encode_share_access(xdr, arg->fmode); | 1526 | encode_share_access(xdr, arg->share_access); |
1536 | } | 1527 | } |
1537 | 1528 | ||
1538 | static void | 1529 | static void |
@@ -1801,9 +1792,8 @@ static void encode_create_session(struct xdr_stream *xdr, | |||
1801 | struct compound_hdr *hdr) | 1792 | struct compound_hdr *hdr) |
1802 | { | 1793 | { |
1803 | __be32 *p; | 1794 | __be32 *p; |
1804 | char machine_name[NFS4_MAX_MACHINE_NAME_LEN]; | ||
1805 | uint32_t len; | ||
1806 | struct nfs_client *clp = args->client; | 1795 | struct nfs_client *clp = args->client; |
1796 | struct rpc_clnt *clnt = clp->cl_rpcclient; | ||
1807 | struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); | 1797 | struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); |
1808 | u32 max_resp_sz_cached; | 1798 | u32 max_resp_sz_cached; |
1809 | 1799 | ||
@@ -1814,11 +1804,8 @@ static void encode_create_session(struct xdr_stream *xdr, | |||
1814 | max_resp_sz_cached = (NFS4_dec_open_sz + RPC_REPHDRSIZE + | 1804 | max_resp_sz_cached = (NFS4_dec_open_sz + RPC_REPHDRSIZE + |
1815 | RPC_MAX_AUTH_SIZE + 2) * XDR_UNIT; | 1805 | RPC_MAX_AUTH_SIZE + 2) * XDR_UNIT; |
1816 | 1806 | ||
1817 | len = scnprintf(machine_name, sizeof(machine_name), "%s", | ||
1818 | clp->cl_ipaddr); | ||
1819 | |||
1820 | encode_op_hdr(xdr, OP_CREATE_SESSION, decode_create_session_maxsz, hdr); | 1807 | encode_op_hdr(xdr, OP_CREATE_SESSION, decode_create_session_maxsz, hdr); |
1821 | p = reserve_space(xdr, 16 + 2*28 + 20 + len + 12); | 1808 | p = reserve_space(xdr, 16 + 2*28 + 20 + clnt->cl_nodelen + 12); |
1822 | p = xdr_encode_hyper(p, clp->cl_clientid); | 1809 | p = xdr_encode_hyper(p, clp->cl_clientid); |
1823 | *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ | 1810 | *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ |
1824 | *p++ = cpu_to_be32(args->flags); /*flags */ | 1811 | *p++ = cpu_to_be32(args->flags); /*flags */ |
@@ -1847,7 +1834,7 @@ static void encode_create_session(struct xdr_stream *xdr, | |||
1847 | 1834 | ||
1848 | /* authsys_parms rfc1831 */ | 1835 | /* authsys_parms rfc1831 */ |
1849 | *p++ = cpu_to_be32(nn->boot_time.tv_nsec); /* stamp */ | 1836 | *p++ = cpu_to_be32(nn->boot_time.tv_nsec); /* stamp */ |
1850 | p = xdr_encode_opaque(p, machine_name, len); | 1837 | p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); |
1851 | *p++ = cpu_to_be32(0); /* UID */ | 1838 | *p++ = cpu_to_be32(0); /* UID */ |
1852 | *p++ = cpu_to_be32(0); /* GID */ | 1839 | *p++ = cpu_to_be32(0); /* GID */ |
1853 | *p = cpu_to_be32(0); /* No more gids */ | 1840 | *p = cpu_to_be32(0); /* No more gids */ |
@@ -2012,11 +1999,11 @@ encode_layoutreturn(struct xdr_stream *xdr, | |||
2012 | p = reserve_space(xdr, 16); | 1999 | p = reserve_space(xdr, 16); |
2013 | *p++ = cpu_to_be32(0); /* reclaim. always 0 for now */ | 2000 | *p++ = cpu_to_be32(0); /* reclaim. always 0 for now */ |
2014 | *p++ = cpu_to_be32(args->layout_type); | 2001 | *p++ = cpu_to_be32(args->layout_type); |
2015 | *p++ = cpu_to_be32(IOMODE_ANY); | 2002 | *p++ = cpu_to_be32(args->range.iomode); |
2016 | *p = cpu_to_be32(RETURN_FILE); | 2003 | *p = cpu_to_be32(RETURN_FILE); |
2017 | p = reserve_space(xdr, 16); | 2004 | p = reserve_space(xdr, 16); |
2018 | p = xdr_encode_hyper(p, 0); | 2005 | p = xdr_encode_hyper(p, args->range.offset); |
2019 | p = xdr_encode_hyper(p, NFS4_MAX_UINT64); | 2006 | p = xdr_encode_hyper(p, args->range.length); |
2020 | spin_lock(&args->inode->i_lock); | 2007 | spin_lock(&args->inode->i_lock); |
2021 | encode_nfs4_stateid(xdr, &args->stateid); | 2008 | encode_nfs4_stateid(xdr, &args->stateid); |
2022 | spin_unlock(&args->inode->i_lock); | 2009 | spin_unlock(&args->inode->i_lock); |
@@ -4936,20 +4923,13 @@ out_overflow: | |||
4936 | return -EIO; | 4923 | return -EIO; |
4937 | } | 4924 | } |
4938 | 4925 | ||
4939 | static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) | 4926 | static int decode_rw_delegation(struct xdr_stream *xdr, |
4927 | uint32_t delegation_type, | ||
4928 | struct nfs_openres *res) | ||
4940 | { | 4929 | { |
4941 | __be32 *p; | 4930 | __be32 *p; |
4942 | uint32_t delegation_type; | ||
4943 | int status; | 4931 | int status; |
4944 | 4932 | ||
4945 | p = xdr_inline_decode(xdr, 4); | ||
4946 | if (unlikely(!p)) | ||
4947 | goto out_overflow; | ||
4948 | delegation_type = be32_to_cpup(p); | ||
4949 | if (delegation_type == NFS4_OPEN_DELEGATE_NONE) { | ||
4950 | res->delegation_type = 0; | ||
4951 | return 0; | ||
4952 | } | ||
4953 | status = decode_stateid(xdr, &res->delegation); | 4933 | status = decode_stateid(xdr, &res->delegation); |
4954 | if (unlikely(status)) | 4934 | if (unlikely(status)) |
4955 | return status; | 4935 | return status; |
@@ -4973,6 +4953,52 @@ out_overflow: | |||
4973 | return -EIO; | 4953 | return -EIO; |
4974 | } | 4954 | } |
4975 | 4955 | ||
4956 | static int decode_no_delegation(struct xdr_stream *xdr, struct nfs_openres *res) | ||
4957 | { | ||
4958 | __be32 *p; | ||
4959 | uint32_t why_no_delegation; | ||
4960 | |||
4961 | p = xdr_inline_decode(xdr, 4); | ||
4962 | if (unlikely(!p)) | ||
4963 | goto out_overflow; | ||
4964 | why_no_delegation = be32_to_cpup(p); | ||
4965 | switch (why_no_delegation) { | ||
4966 | case WND4_CONTENTION: | ||
4967 | case WND4_RESOURCE: | ||
4968 | xdr_inline_decode(xdr, 4); | ||
4969 | /* Ignore for now */ | ||
4970 | } | ||
4971 | return 0; | ||
4972 | out_overflow: | ||
4973 | print_overflow_msg(__func__, xdr); | ||
4974 | return -EIO; | ||
4975 | } | ||
4976 | |||
4977 | static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) | ||
4978 | { | ||
4979 | __be32 *p; | ||
4980 | uint32_t delegation_type; | ||
4981 | |||
4982 | p = xdr_inline_decode(xdr, 4); | ||
4983 | if (unlikely(!p)) | ||
4984 | goto out_overflow; | ||
4985 | delegation_type = be32_to_cpup(p); | ||
4986 | res->delegation_type = 0; | ||
4987 | switch (delegation_type) { | ||
4988 | case NFS4_OPEN_DELEGATE_NONE: | ||
4989 | return 0; | ||
4990 | case NFS4_OPEN_DELEGATE_READ: | ||
4991 | case NFS4_OPEN_DELEGATE_WRITE: | ||
4992 | return decode_rw_delegation(xdr, delegation_type, res); | ||
4993 | case NFS4_OPEN_DELEGATE_NONE_EXT: | ||
4994 | return decode_no_delegation(xdr, res); | ||
4995 | } | ||
4996 | return -EIO; | ||
4997 | out_overflow: | ||
4998 | print_overflow_msg(__func__, xdr); | ||
4999 | return -EIO; | ||
5000 | } | ||
5001 | |||
4976 | static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) | 5002 | static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) |
4977 | { | 5003 | { |
4978 | __be32 *p; | 5004 | __be32 *p; |
@@ -6567,6 +6593,7 @@ static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
6567 | int status; | 6593 | int status; |
6568 | 6594 | ||
6569 | status = decode_compound_hdr(xdr, &hdr); | 6595 | status = decode_compound_hdr(xdr, &hdr); |
6596 | res->op_status = hdr.status; | ||
6570 | if (status) | 6597 | if (status) |
6571 | goto out; | 6598 | goto out; |
6572 | status = decode_sequence(xdr, &res->seq_res, rqstp); | 6599 | status = decode_sequence(xdr, &res->seq_res, rqstp); |
@@ -6592,6 +6619,7 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
6592 | int status; | 6619 | int status; |
6593 | 6620 | ||
6594 | status = decode_compound_hdr(xdr, &hdr); | 6621 | status = decode_compound_hdr(xdr, &hdr); |
6622 | res->op_status = hdr.status; | ||
6595 | if (status) | 6623 | if (status) |
6596 | goto out; | 6624 | goto out; |
6597 | status = decode_sequence(xdr, &res->seq_res, rqstp); | 6625 | status = decode_sequence(xdr, &res->seq_res, rqstp); |
@@ -6621,6 +6649,7 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
6621 | int status; | 6649 | int status; |
6622 | 6650 | ||
6623 | status = decode_compound_hdr(xdr, &hdr); | 6651 | status = decode_compound_hdr(xdr, &hdr); |
6652 | res->op_status = hdr.status; | ||
6624 | if (status) | 6653 | if (status) |
6625 | goto out; | 6654 | goto out; |
6626 | status = decode_sequence(xdr, &res->seq_res, rqstp); | 6655 | status = decode_sequence(xdr, &res->seq_res, rqstp); |
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index cd3c910d2d12..9bc9f04fb7f6 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c | |||
@@ -261,11 +261,11 @@ static int __init root_nfs_data(char *cmdline) | |||
261 | */ | 261 | */ |
262 | len = snprintf(nfs_export_path, sizeof(nfs_export_path), | 262 | len = snprintf(nfs_export_path, sizeof(nfs_export_path), |
263 | tmp, utsname()->nodename); | 263 | tmp, utsname()->nodename); |
264 | if (len > (int)sizeof(nfs_export_path)) | 264 | if (len >= (int)sizeof(nfs_export_path)) |
265 | goto out_devnametoolong; | 265 | goto out_devnametoolong; |
266 | len = snprintf(nfs_root_device, sizeof(nfs_root_device), | 266 | len = snprintf(nfs_root_device, sizeof(nfs_root_device), |
267 | "%pI4:%s", &servaddr, nfs_export_path); | 267 | "%pI4:%s", &servaddr, nfs_export_path); |
268 | if (len > (int)sizeof(nfs_root_device)) | 268 | if (len >= (int)sizeof(nfs_root_device)) |
269 | goto out_devnametoolong; | 269 | goto out_devnametoolong; |
270 | 270 | ||
271 | retval = 0; | 271 | retval = 0; |
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 9e5bc42180e4..24e1d7403c0b 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
@@ -537,11 +537,12 @@ int objio_write_pagelist(struct nfs_pgio_header *hdr, int how) | |||
537 | static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, | 537 | static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, |
538 | struct nfs_page *prev, struct nfs_page *req) | 538 | struct nfs_page *prev, struct nfs_page *req) |
539 | { | 539 | { |
540 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(pgio); | ||
540 | unsigned int size; | 541 | unsigned int size; |
541 | 542 | ||
542 | size = pnfs_generic_pg_test(pgio, prev, req); | 543 | size = pnfs_generic_pg_test(pgio, prev, req); |
543 | 544 | ||
544 | if (!size || pgio->pg_count + req->wb_bytes > | 545 | if (!size || mirror->pg_count + req->wb_bytes > |
545 | (unsigned long)pgio->pg_layout_private) | 546 | (unsigned long)pgio->pg_layout_private) |
546 | return 0; | 547 | return 0; |
547 | 548 | ||
@@ -607,12 +608,14 @@ static const struct nfs_pageio_ops objio_pg_read_ops = { | |||
607 | .pg_init = objio_init_read, | 608 | .pg_init = objio_init_read, |
608 | .pg_test = objio_pg_test, | 609 | .pg_test = objio_pg_test, |
609 | .pg_doio = pnfs_generic_pg_readpages, | 610 | .pg_doio = pnfs_generic_pg_readpages, |
611 | .pg_cleanup = pnfs_generic_pg_cleanup, | ||
610 | }; | 612 | }; |
611 | 613 | ||
612 | static const struct nfs_pageio_ops objio_pg_write_ops = { | 614 | static const struct nfs_pageio_ops objio_pg_write_ops = { |
613 | .pg_init = objio_init_write, | 615 | .pg_init = objio_init_write, |
614 | .pg_test = objio_pg_test, | 616 | .pg_test = objio_pg_test, |
615 | .pg_doio = pnfs_generic_pg_writepages, | 617 | .pg_doio = pnfs_generic_pg_writepages, |
618 | .pg_cleanup = pnfs_generic_pg_cleanup, | ||
616 | }; | 619 | }; |
617 | 620 | ||
618 | static struct pnfs_layoutdriver_type objlayout_type = { | 621 | static struct pnfs_layoutdriver_type objlayout_type = { |
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 2b5e769beb16..d57190a0d533 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -42,21 +42,35 @@ static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) | |||
42 | return p->pagevec != NULL; | 42 | return p->pagevec != NULL; |
43 | } | 43 | } |
44 | 44 | ||
45 | struct nfs_pgio_mirror * | ||
46 | nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc) | ||
47 | { | ||
48 | return nfs_pgio_has_mirroring(desc) ? | ||
49 | &desc->pg_mirrors[desc->pg_mirror_idx] : | ||
50 | &desc->pg_mirrors[0]; | ||
51 | } | ||
52 | EXPORT_SYMBOL_GPL(nfs_pgio_current_mirror); | ||
53 | |||
45 | void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, | 54 | void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, |
46 | struct nfs_pgio_header *hdr, | 55 | struct nfs_pgio_header *hdr, |
47 | void (*release)(struct nfs_pgio_header *hdr)) | 56 | void (*release)(struct nfs_pgio_header *hdr)) |
48 | { | 57 | { |
49 | hdr->req = nfs_list_entry(desc->pg_list.next); | 58 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); |
59 | |||
60 | |||
61 | hdr->req = nfs_list_entry(mirror->pg_list.next); | ||
50 | hdr->inode = desc->pg_inode; | 62 | hdr->inode = desc->pg_inode; |
51 | hdr->cred = hdr->req->wb_context->cred; | 63 | hdr->cred = hdr->req->wb_context->cred; |
52 | hdr->io_start = req_offset(hdr->req); | 64 | hdr->io_start = req_offset(hdr->req); |
53 | hdr->good_bytes = desc->pg_count; | 65 | hdr->good_bytes = mirror->pg_count; |
54 | hdr->dreq = desc->pg_dreq; | 66 | hdr->dreq = desc->pg_dreq; |
55 | hdr->layout_private = desc->pg_layout_private; | 67 | hdr->layout_private = desc->pg_layout_private; |
56 | hdr->release = release; | 68 | hdr->release = release; |
57 | hdr->completion_ops = desc->pg_completion_ops; | 69 | hdr->completion_ops = desc->pg_completion_ops; |
58 | if (hdr->completion_ops->init_hdr) | 70 | if (hdr->completion_ops->init_hdr) |
59 | hdr->completion_ops->init_hdr(hdr); | 71 | hdr->completion_ops->init_hdr(hdr); |
72 | |||
73 | hdr->pgio_mirror_idx = desc->pg_mirror_idx; | ||
60 | } | 74 | } |
61 | EXPORT_SYMBOL_GPL(nfs_pgheader_init); | 75 | EXPORT_SYMBOL_GPL(nfs_pgheader_init); |
62 | 76 | ||
@@ -480,7 +494,10 @@ nfs_wait_on_request(struct nfs_page *req) | |||
480 | size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, | 494 | size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, |
481 | struct nfs_page *prev, struct nfs_page *req) | 495 | struct nfs_page *prev, struct nfs_page *req) |
482 | { | 496 | { |
483 | if (desc->pg_count > desc->pg_bsize) { | 497 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); |
498 | |||
499 | |||
500 | if (mirror->pg_count > mirror->pg_bsize) { | ||
484 | /* should never happen */ | 501 | /* should never happen */ |
485 | WARN_ON_ONCE(1); | 502 | WARN_ON_ONCE(1); |
486 | return 0; | 503 | return 0; |
@@ -490,11 +507,11 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, | |||
490 | * Limit the request size so that we can still allocate a page array | 507 | * Limit the request size so that we can still allocate a page array |
491 | * for it without upsetting the slab allocator. | 508 | * for it without upsetting the slab allocator. |
492 | */ | 509 | */ |
493 | if (((desc->pg_count + req->wb_bytes) >> PAGE_SHIFT) * | 510 | if (((mirror->pg_count + req->wb_bytes) >> PAGE_SHIFT) * |
494 | sizeof(struct page) > PAGE_SIZE) | 511 | sizeof(struct page) > PAGE_SIZE) |
495 | return 0; | 512 | return 0; |
496 | 513 | ||
497 | return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); | 514 | return min(mirror->pg_bsize - mirror->pg_count, (size_t)req->wb_bytes); |
498 | } | 515 | } |
499 | EXPORT_SYMBOL_GPL(nfs_generic_pg_test); | 516 | EXPORT_SYMBOL_GPL(nfs_generic_pg_test); |
500 | 517 | ||
@@ -597,13 +614,14 @@ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) | |||
597 | } | 614 | } |
598 | 615 | ||
599 | int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, | 616 | int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, |
617 | struct rpc_cred *cred, const struct nfs_rpc_ops *rpc_ops, | ||
600 | const struct rpc_call_ops *call_ops, int how, int flags) | 618 | const struct rpc_call_ops *call_ops, int how, int flags) |
601 | { | 619 | { |
602 | struct rpc_task *task; | 620 | struct rpc_task *task; |
603 | struct rpc_message msg = { | 621 | struct rpc_message msg = { |
604 | .rpc_argp = &hdr->args, | 622 | .rpc_argp = &hdr->args, |
605 | .rpc_resp = &hdr->res, | 623 | .rpc_resp = &hdr->res, |
606 | .rpc_cred = hdr->cred, | 624 | .rpc_cred = cred, |
607 | }; | 625 | }; |
608 | struct rpc_task_setup task_setup_data = { | 626 | struct rpc_task_setup task_setup_data = { |
609 | .rpc_client = clnt, | 627 | .rpc_client = clnt, |
@@ -616,7 +634,7 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, | |||
616 | }; | 634 | }; |
617 | int ret = 0; | 635 | int ret = 0; |
618 | 636 | ||
619 | hdr->rw_ops->rw_initiate(hdr, &msg, &task_setup_data, how); | 637 | hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how); |
620 | 638 | ||
621 | dprintk("NFS: %5u initiated pgio call " | 639 | dprintk("NFS: %5u initiated pgio call " |
622 | "(req %s/%llu, %u bytes @ offset %llu)\n", | 640 | "(req %s/%llu, %u bytes @ offset %llu)\n", |
@@ -650,10 +668,18 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio); | |||
650 | static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, | 668 | static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, |
651 | struct nfs_pgio_header *hdr) | 669 | struct nfs_pgio_header *hdr) |
652 | { | 670 | { |
671 | struct nfs_pgio_mirror *mirror; | ||
672 | u32 midx; | ||
673 | |||
653 | set_bit(NFS_IOHDR_REDO, &hdr->flags); | 674 | set_bit(NFS_IOHDR_REDO, &hdr->flags); |
654 | nfs_pgio_data_destroy(hdr); | 675 | nfs_pgio_data_destroy(hdr); |
655 | hdr->completion_ops->completion(hdr); | 676 | hdr->completion_ops->completion(hdr); |
656 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | 677 | /* TODO: Make sure it's right to clean up all mirrors here |
678 | * and not just hdr->pgio_mirror_idx */ | ||
679 | for (midx = 0; midx < desc->pg_mirror_count; midx++) { | ||
680 | mirror = &desc->pg_mirrors[midx]; | ||
681 | desc->pg_completion_ops->error_cleanup(&mirror->pg_list); | ||
682 | } | ||
657 | return -ENOMEM; | 683 | return -ENOMEM; |
658 | } | 684 | } |
659 | 685 | ||
@@ -670,6 +696,17 @@ static void nfs_pgio_release(void *calldata) | |||
670 | hdr->completion_ops->completion(hdr); | 696 | hdr->completion_ops->completion(hdr); |
671 | } | 697 | } |
672 | 698 | ||
699 | static void nfs_pageio_mirror_init(struct nfs_pgio_mirror *mirror, | ||
700 | unsigned int bsize) | ||
701 | { | ||
702 | INIT_LIST_HEAD(&mirror->pg_list); | ||
703 | mirror->pg_bytes_written = 0; | ||
704 | mirror->pg_count = 0; | ||
705 | mirror->pg_bsize = bsize; | ||
706 | mirror->pg_base = 0; | ||
707 | mirror->pg_recoalesce = 0; | ||
708 | } | ||
709 | |||
673 | /** | 710 | /** |
674 | * nfs_pageio_init - initialise a page io descriptor | 711 | * nfs_pageio_init - initialise a page io descriptor |
675 | * @desc: pointer to descriptor | 712 | * @desc: pointer to descriptor |
@@ -686,13 +723,10 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
686 | size_t bsize, | 723 | size_t bsize, |
687 | int io_flags) | 724 | int io_flags) |
688 | { | 725 | { |
689 | INIT_LIST_HEAD(&desc->pg_list); | 726 | struct nfs_pgio_mirror *new; |
690 | desc->pg_bytes_written = 0; | 727 | int i; |
691 | desc->pg_count = 0; | 728 | |
692 | desc->pg_bsize = bsize; | ||
693 | desc->pg_base = 0; | ||
694 | desc->pg_moreio = 0; | 729 | desc->pg_moreio = 0; |
695 | desc->pg_recoalesce = 0; | ||
696 | desc->pg_inode = inode; | 730 | desc->pg_inode = inode; |
697 | desc->pg_ops = pg_ops; | 731 | desc->pg_ops = pg_ops; |
698 | desc->pg_completion_ops = compl_ops; | 732 | desc->pg_completion_ops = compl_ops; |
@@ -702,6 +736,26 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
702 | desc->pg_lseg = NULL; | 736 | desc->pg_lseg = NULL; |
703 | desc->pg_dreq = NULL; | 737 | desc->pg_dreq = NULL; |
704 | desc->pg_layout_private = NULL; | 738 | desc->pg_layout_private = NULL; |
739 | desc->pg_bsize = bsize; | ||
740 | |||
741 | desc->pg_mirror_count = 1; | ||
742 | desc->pg_mirror_idx = 0; | ||
743 | |||
744 | if (pg_ops->pg_get_mirror_count) { | ||
745 | /* until we have a request, we don't have an lseg and no | ||
746 | * idea how many mirrors there will be */ | ||
747 | new = kcalloc(NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX, | ||
748 | sizeof(struct nfs_pgio_mirror), GFP_KERNEL); | ||
749 | desc->pg_mirrors_dynamic = new; | ||
750 | desc->pg_mirrors = new; | ||
751 | |||
752 | for (i = 0; i < NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX; i++) | ||
753 | nfs_pageio_mirror_init(&desc->pg_mirrors[i], bsize); | ||
754 | } else { | ||
755 | desc->pg_mirrors_dynamic = NULL; | ||
756 | desc->pg_mirrors = desc->pg_mirrors_static; | ||
757 | nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize); | ||
758 | } | ||
705 | } | 759 | } |
706 | EXPORT_SYMBOL_GPL(nfs_pageio_init); | 760 | EXPORT_SYMBOL_GPL(nfs_pageio_init); |
707 | 761 | ||
@@ -737,14 +791,16 @@ static void nfs_pgio_result(struct rpc_task *task, void *calldata) | |||
737 | int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, | 791 | int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, |
738 | struct nfs_pgio_header *hdr) | 792 | struct nfs_pgio_header *hdr) |
739 | { | 793 | { |
794 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||
795 | |||
740 | struct nfs_page *req; | 796 | struct nfs_page *req; |
741 | struct page **pages, | 797 | struct page **pages, |
742 | *last_page; | 798 | *last_page; |
743 | struct list_head *head = &desc->pg_list; | 799 | struct list_head *head = &mirror->pg_list; |
744 | struct nfs_commit_info cinfo; | 800 | struct nfs_commit_info cinfo; |
745 | unsigned int pagecount, pageused; | 801 | unsigned int pagecount, pageused; |
746 | 802 | ||
747 | pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count); | 803 | pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); |
748 | if (!nfs_pgarray_set(&hdr->page_array, pagecount)) | 804 | if (!nfs_pgarray_set(&hdr->page_array, pagecount)) |
749 | return nfs_pgio_error(desc, hdr); | 805 | return nfs_pgio_error(desc, hdr); |
750 | 806 | ||
@@ -772,7 +828,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, | |||
772 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; | 828 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; |
773 | 829 | ||
774 | /* Set up the argument struct */ | 830 | /* Set up the argument struct */ |
775 | nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo); | 831 | nfs_pgio_rpcsetup(hdr, mirror->pg_count, 0, desc->pg_ioflags, &cinfo); |
776 | desc->pg_rpc_callops = &nfs_pgio_common_ops; | 832 | desc->pg_rpc_callops = &nfs_pgio_common_ops; |
777 | return 0; | 833 | return 0; |
778 | } | 834 | } |
@@ -780,23 +836,74 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio); | |||
780 | 836 | ||
781 | static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) | 837 | static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) |
782 | { | 838 | { |
839 | struct nfs_pgio_mirror *mirror; | ||
783 | struct nfs_pgio_header *hdr; | 840 | struct nfs_pgio_header *hdr; |
784 | int ret; | 841 | int ret; |
785 | 842 | ||
843 | mirror = nfs_pgio_current_mirror(desc); | ||
844 | |||
786 | hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); | 845 | hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); |
787 | if (!hdr) { | 846 | if (!hdr) { |
788 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | 847 | /* TODO: make sure this is right with mirroring - or |
848 | * should it back out all mirrors? */ | ||
849 | desc->pg_completion_ops->error_cleanup(&mirror->pg_list); | ||
789 | return -ENOMEM; | 850 | return -ENOMEM; |
790 | } | 851 | } |
791 | nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); | 852 | nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); |
792 | ret = nfs_generic_pgio(desc, hdr); | 853 | ret = nfs_generic_pgio(desc, hdr); |
793 | if (ret == 0) | 854 | if (ret == 0) |
794 | ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), | 855 | ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), |
795 | hdr, desc->pg_rpc_callops, | 856 | hdr, |
857 | hdr->cred, | ||
858 | NFS_PROTO(hdr->inode), | ||
859 | desc->pg_rpc_callops, | ||
796 | desc->pg_ioflags, 0); | 860 | desc->pg_ioflags, 0); |
797 | return ret; | 861 | return ret; |
798 | } | 862 | } |
799 | 863 | ||
864 | /* | ||
865 | * nfs_pageio_setup_mirroring - determine if mirroring is to be used | ||
866 | * by calling the pg_get_mirror_count op | ||
867 | */ | ||
868 | static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio, | ||
869 | struct nfs_page *req) | ||
870 | { | ||
871 | int mirror_count = 1; | ||
872 | |||
873 | if (!pgio->pg_ops->pg_get_mirror_count) | ||
874 | return 0; | ||
875 | |||
876 | mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); | ||
877 | |||
878 | if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX) | ||
879 | return -EINVAL; | ||
880 | |||
881 | if (WARN_ON_ONCE(!pgio->pg_mirrors_dynamic)) | ||
882 | return -EINVAL; | ||
883 | |||
884 | pgio->pg_mirror_count = mirror_count; | ||
885 | |||
886 | return 0; | ||
887 | } | ||
888 | |||
889 | /* | ||
890 | * nfs_pageio_stop_mirroring - stop using mirroring (set mirror count to 1) | ||
891 | */ | ||
892 | void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio) | ||
893 | { | ||
894 | pgio->pg_mirror_count = 1; | ||
895 | pgio->pg_mirror_idx = 0; | ||
896 | } | ||
897 | |||
898 | static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio) | ||
899 | { | ||
900 | pgio->pg_mirror_count = 1; | ||
901 | pgio->pg_mirror_idx = 0; | ||
902 | pgio->pg_mirrors = pgio->pg_mirrors_static; | ||
903 | kfree(pgio->pg_mirrors_dynamic); | ||
904 | pgio->pg_mirrors_dynamic = NULL; | ||
905 | } | ||
906 | |||
800 | static bool nfs_match_open_context(const struct nfs_open_context *ctx1, | 907 | static bool nfs_match_open_context(const struct nfs_open_context *ctx1, |
801 | const struct nfs_open_context *ctx2) | 908 | const struct nfs_open_context *ctx2) |
802 | { | 909 | { |
@@ -826,11 +933,15 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, | |||
826 | struct nfs_pageio_descriptor *pgio) | 933 | struct nfs_pageio_descriptor *pgio) |
827 | { | 934 | { |
828 | size_t size; | 935 | size_t size; |
936 | struct file_lock_context *flctx; | ||
829 | 937 | ||
830 | if (prev) { | 938 | if (prev) { |
831 | if (!nfs_match_open_context(req->wb_context, prev->wb_context)) | 939 | if (!nfs_match_open_context(req->wb_context, prev->wb_context)) |
832 | return false; | 940 | return false; |
833 | if (req->wb_context->dentry->d_inode->i_flock != NULL && | 941 | flctx = req->wb_context->dentry->d_inode->i_flctx; |
942 | if (flctx != NULL && | ||
943 | !(list_empty_careful(&flctx->flc_posix) && | ||
944 | list_empty_careful(&flctx->flc_flock)) && | ||
834 | !nfs_match_lock_context(req->wb_lock_context, | 945 | !nfs_match_lock_context(req->wb_lock_context, |
835 | prev->wb_lock_context)) | 946 | prev->wb_lock_context)) |
836 | return false; | 947 | return false; |
@@ -863,19 +974,22 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, | |||
863 | static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, | 974 | static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, |
864 | struct nfs_page *req) | 975 | struct nfs_page *req) |
865 | { | 976 | { |
977 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||
978 | |||
866 | struct nfs_page *prev = NULL; | 979 | struct nfs_page *prev = NULL; |
867 | if (desc->pg_count != 0) { | 980 | |
868 | prev = nfs_list_entry(desc->pg_list.prev); | 981 | if (mirror->pg_count != 0) { |
982 | prev = nfs_list_entry(mirror->pg_list.prev); | ||
869 | } else { | 983 | } else { |
870 | if (desc->pg_ops->pg_init) | 984 | if (desc->pg_ops->pg_init) |
871 | desc->pg_ops->pg_init(desc, req); | 985 | desc->pg_ops->pg_init(desc, req); |
872 | desc->pg_base = req->wb_pgbase; | 986 | mirror->pg_base = req->wb_pgbase; |
873 | } | 987 | } |
874 | if (!nfs_can_coalesce_requests(prev, req, desc)) | 988 | if (!nfs_can_coalesce_requests(prev, req, desc)) |
875 | return 0; | 989 | return 0; |
876 | nfs_list_remove_request(req); | 990 | nfs_list_remove_request(req); |
877 | nfs_list_add_request(req, &desc->pg_list); | 991 | nfs_list_add_request(req, &mirror->pg_list); |
878 | desc->pg_count += req->wb_bytes; | 992 | mirror->pg_count += req->wb_bytes; |
879 | return 1; | 993 | return 1; |
880 | } | 994 | } |
881 | 995 | ||
@@ -884,16 +998,19 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, | |||
884 | */ | 998 | */ |
885 | static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) | 999 | static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) |
886 | { | 1000 | { |
887 | if (!list_empty(&desc->pg_list)) { | 1001 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); |
1002 | |||
1003 | |||
1004 | if (!list_empty(&mirror->pg_list)) { | ||
888 | int error = desc->pg_ops->pg_doio(desc); | 1005 | int error = desc->pg_ops->pg_doio(desc); |
889 | if (error < 0) | 1006 | if (error < 0) |
890 | desc->pg_error = error; | 1007 | desc->pg_error = error; |
891 | else | 1008 | else |
892 | desc->pg_bytes_written += desc->pg_count; | 1009 | mirror->pg_bytes_written += mirror->pg_count; |
893 | } | 1010 | } |
894 | if (list_empty(&desc->pg_list)) { | 1011 | if (list_empty(&mirror->pg_list)) { |
895 | desc->pg_count = 0; | 1012 | mirror->pg_count = 0; |
896 | desc->pg_base = 0; | 1013 | mirror->pg_base = 0; |
897 | } | 1014 | } |
898 | } | 1015 | } |
899 | 1016 | ||
@@ -911,6 +1028,8 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) | |||
911 | static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | 1028 | static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, |
912 | struct nfs_page *req) | 1029 | struct nfs_page *req) |
913 | { | 1030 | { |
1031 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||
1032 | |||
914 | struct nfs_page *subreq; | 1033 | struct nfs_page *subreq; |
915 | unsigned int bytes_left = 0; | 1034 | unsigned int bytes_left = 0; |
916 | unsigned int offset, pgbase; | 1035 | unsigned int offset, pgbase; |
@@ -934,7 +1053,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | |||
934 | nfs_pageio_doio(desc); | 1053 | nfs_pageio_doio(desc); |
935 | if (desc->pg_error < 0) | 1054 | if (desc->pg_error < 0) |
936 | return 0; | 1055 | return 0; |
937 | if (desc->pg_recoalesce) | 1056 | if (mirror->pg_recoalesce) |
938 | return 0; | 1057 | return 0; |
939 | /* retry add_request for this subreq */ | 1058 | /* retry add_request for this subreq */ |
940 | nfs_page_group_lock(req, false); | 1059 | nfs_page_group_lock(req, false); |
@@ -972,14 +1091,16 @@ err_ptr: | |||
972 | 1091 | ||
973 | static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) | 1092 | static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) |
974 | { | 1093 | { |
1094 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||
975 | LIST_HEAD(head); | 1095 | LIST_HEAD(head); |
976 | 1096 | ||
977 | do { | 1097 | do { |
978 | list_splice_init(&desc->pg_list, &head); | 1098 | list_splice_init(&mirror->pg_list, &head); |
979 | desc->pg_bytes_written -= desc->pg_count; | 1099 | mirror->pg_bytes_written -= mirror->pg_count; |
980 | desc->pg_count = 0; | 1100 | mirror->pg_count = 0; |
981 | desc->pg_base = 0; | 1101 | mirror->pg_base = 0; |
982 | desc->pg_recoalesce = 0; | 1102 | mirror->pg_recoalesce = 0; |
1103 | |||
983 | desc->pg_moreio = 0; | 1104 | desc->pg_moreio = 0; |
984 | 1105 | ||
985 | while (!list_empty(&head)) { | 1106 | while (!list_empty(&head)) { |
@@ -993,11 +1114,11 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) | |||
993 | return 0; | 1114 | return 0; |
994 | break; | 1115 | break; |
995 | } | 1116 | } |
996 | } while (desc->pg_recoalesce); | 1117 | } while (mirror->pg_recoalesce); |
997 | return 1; | 1118 | return 1; |
998 | } | 1119 | } |
999 | 1120 | ||
1000 | int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | 1121 | static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc, |
1001 | struct nfs_page *req) | 1122 | struct nfs_page *req) |
1002 | { | 1123 | { |
1003 | int ret; | 1124 | int ret; |
@@ -1010,9 +1131,80 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | |||
1010 | break; | 1131 | break; |
1011 | ret = nfs_do_recoalesce(desc); | 1132 | ret = nfs_do_recoalesce(desc); |
1012 | } while (ret); | 1133 | } while (ret); |
1134 | |||
1013 | return ret; | 1135 | return ret; |
1014 | } | 1136 | } |
1015 | 1137 | ||
1138 | int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | ||
1139 | struct nfs_page *req) | ||
1140 | { | ||
1141 | u32 midx; | ||
1142 | unsigned int pgbase, offset, bytes; | ||
1143 | struct nfs_page *dupreq, *lastreq; | ||
1144 | |||
1145 | pgbase = req->wb_pgbase; | ||
1146 | offset = req->wb_offset; | ||
1147 | bytes = req->wb_bytes; | ||
1148 | |||
1149 | nfs_pageio_setup_mirroring(desc, req); | ||
1150 | |||
1151 | for (midx = 0; midx < desc->pg_mirror_count; midx++) { | ||
1152 | if (midx) { | ||
1153 | nfs_page_group_lock(req, false); | ||
1154 | |||
1155 | /* find the last request */ | ||
1156 | for (lastreq = req->wb_head; | ||
1157 | lastreq->wb_this_page != req->wb_head; | ||
1158 | lastreq = lastreq->wb_this_page) | ||
1159 | ; | ||
1160 | |||
1161 | dupreq = nfs_create_request(req->wb_context, | ||
1162 | req->wb_page, lastreq, pgbase, bytes); | ||
1163 | |||
1164 | if (IS_ERR(dupreq)) { | ||
1165 | nfs_page_group_unlock(req); | ||
1166 | return 0; | ||
1167 | } | ||
1168 | |||
1169 | nfs_lock_request(dupreq); | ||
1170 | nfs_page_group_unlock(req); | ||
1171 | dupreq->wb_offset = offset; | ||
1172 | dupreq->wb_index = req->wb_index; | ||
1173 | } else | ||
1174 | dupreq = req; | ||
1175 | |||
1176 | if (nfs_pgio_has_mirroring(desc)) | ||
1177 | desc->pg_mirror_idx = midx; | ||
1178 | if (!nfs_pageio_add_request_mirror(desc, dupreq)) | ||
1179 | return 0; | ||
1180 | } | ||
1181 | |||
1182 | return 1; | ||
1183 | } | ||
1184 | |||
1185 | /* | ||
1186 | * nfs_pageio_complete_mirror - Complete I/O on the current mirror of an | ||
1187 | * nfs_pageio_descriptor | ||
1188 | * @desc: pointer to io descriptor | ||
1189 | */ | ||
1190 | static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, | ||
1191 | u32 mirror_idx) | ||
1192 | { | ||
1193 | struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[mirror_idx]; | ||
1194 | u32 restore_idx = desc->pg_mirror_idx; | ||
1195 | |||
1196 | if (nfs_pgio_has_mirroring(desc)) | ||
1197 | desc->pg_mirror_idx = mirror_idx; | ||
1198 | for (;;) { | ||
1199 | nfs_pageio_doio(desc); | ||
1200 | if (!mirror->pg_recoalesce) | ||
1201 | break; | ||
1202 | if (!nfs_do_recoalesce(desc)) | ||
1203 | break; | ||
1204 | } | ||
1205 | desc->pg_mirror_idx = restore_idx; | ||
1206 | } | ||
1207 | |||
1016 | /* | 1208 | /* |
1017 | * nfs_pageio_resend - Transfer requests to new descriptor and resend | 1209 | * nfs_pageio_resend - Transfer requests to new descriptor and resend |
1018 | * @hdr - the pgio header to move request from | 1210 | * @hdr - the pgio header to move request from |
@@ -1046,18 +1238,19 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc, | |||
1046 | EXPORT_SYMBOL_GPL(nfs_pageio_resend); | 1238 | EXPORT_SYMBOL_GPL(nfs_pageio_resend); |
1047 | 1239 | ||
1048 | /** | 1240 | /** |
1049 | * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor | 1241 | * nfs_pageio_complete - Complete I/O then cleanup an nfs_pageio_descriptor |
1050 | * @desc: pointer to io descriptor | 1242 | * @desc: pointer to io descriptor |
1051 | */ | 1243 | */ |
1052 | void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) | 1244 | void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) |
1053 | { | 1245 | { |
1054 | for (;;) { | 1246 | u32 midx; |
1055 | nfs_pageio_doio(desc); | 1247 | |
1056 | if (!desc->pg_recoalesce) | 1248 | for (midx = 0; midx < desc->pg_mirror_count; midx++) |
1057 | break; | 1249 | nfs_pageio_complete_mirror(desc, midx); |
1058 | if (!nfs_do_recoalesce(desc)) | 1250 | |
1059 | break; | 1251 | if (desc->pg_ops->pg_cleanup) |
1060 | } | 1252 | desc->pg_ops->pg_cleanup(desc); |
1253 | nfs_pageio_cleanup_mirroring(desc); | ||
1061 | } | 1254 | } |
1062 | 1255 | ||
1063 | /** | 1256 | /** |
@@ -1073,10 +1266,17 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) | |||
1073 | */ | 1266 | */ |
1074 | void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) | 1267 | void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) |
1075 | { | 1268 | { |
1076 | if (!list_empty(&desc->pg_list)) { | 1269 | struct nfs_pgio_mirror *mirror; |
1077 | struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); | 1270 | struct nfs_page *prev; |
1078 | if (index != prev->wb_index + 1) | 1271 | u32 midx; |
1079 | nfs_pageio_complete(desc); | 1272 | |
1273 | for (midx = 0; midx < desc->pg_mirror_count; midx++) { | ||
1274 | mirror = &desc->pg_mirrors[midx]; | ||
1275 | if (!list_empty(&mirror->pg_list)) { | ||
1276 | prev = nfs_list_entry(mirror->pg_list.prev); | ||
1277 | if (index != prev->wb_index + 1) | ||
1278 | nfs_pageio_complete_mirror(desc, midx); | ||
1279 | } | ||
1080 | } | 1280 | } |
1081 | } | 1281 | } |
1082 | 1282 | ||
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0a5dda4d85c2..4f802b02fbb9 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include "pnfs.h" | 34 | #include "pnfs.h" |
35 | #include "iostat.h" | 35 | #include "iostat.h" |
36 | #include "nfs4trace.h" | 36 | #include "nfs4trace.h" |
37 | #include "delegation.h" | ||
37 | 38 | ||
38 | #define NFSDBG_FACILITY NFSDBG_PNFS | 39 | #define NFSDBG_FACILITY NFSDBG_PNFS |
39 | #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) | 40 | #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) |
@@ -50,6 +51,10 @@ static DEFINE_SPINLOCK(pnfs_spinlock); | |||
50 | */ | 51 | */ |
51 | static LIST_HEAD(pnfs_modules_tbl); | 52 | static LIST_HEAD(pnfs_modules_tbl); |
52 | 53 | ||
54 | static int | ||
55 | pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, | ||
56 | enum pnfs_iomode iomode, bool sync); | ||
57 | |||
53 | /* Return the registered pnfs layout driver module matching given id */ | 58 | /* Return the registered pnfs layout driver module matching given id */ |
54 | static struct pnfs_layoutdriver_type * | 59 | static struct pnfs_layoutdriver_type * |
55 | find_pnfs_driver_locked(u32 id) | 60 | find_pnfs_driver_locked(u32 id) |
@@ -238,6 +243,8 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) | |||
238 | struct inode *inode = lo->plh_inode; | 243 | struct inode *inode = lo->plh_inode; |
239 | 244 | ||
240 | if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { | 245 | if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { |
246 | if (!list_empty(&lo->plh_segs)) | ||
247 | WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); | ||
241 | pnfs_detach_layout_hdr(lo); | 248 | pnfs_detach_layout_hdr(lo); |
242 | spin_unlock(&inode->i_lock); | 249 | spin_unlock(&inode->i_lock); |
243 | pnfs_free_layout_hdr(lo); | 250 | pnfs_free_layout_hdr(lo); |
@@ -337,6 +344,48 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, | |||
337 | rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); | 344 | rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); |
338 | } | 345 | } |
339 | 346 | ||
347 | /* Return true if layoutreturn is needed */ | ||
348 | static bool | ||
349 | pnfs_layout_need_return(struct pnfs_layout_hdr *lo, | ||
350 | struct pnfs_layout_segment *lseg) | ||
351 | { | ||
352 | struct pnfs_layout_segment *s; | ||
353 | |||
354 | if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) | ||
355 | return false; | ||
356 | |||
357 | list_for_each_entry(s, &lo->plh_segs, pls_list) | ||
358 | if (s != lseg && test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags)) | ||
359 | return false; | ||
360 | |||
361 | return true; | ||
362 | } | ||
363 | |||
364 | static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg, | ||
365 | struct pnfs_layout_hdr *lo, struct inode *inode) | ||
366 | { | ||
367 | lo = lseg->pls_layout; | ||
368 | inode = lo->plh_inode; | ||
369 | |||
370 | spin_lock(&inode->i_lock); | ||
371 | if (pnfs_layout_need_return(lo, lseg)) { | ||
372 | nfs4_stateid stateid; | ||
373 | enum pnfs_iomode iomode; | ||
374 | |||
375 | stateid = lo->plh_stateid; | ||
376 | iomode = lo->plh_return_iomode; | ||
377 | /* decreased in pnfs_send_layoutreturn() */ | ||
378 | lo->plh_block_lgets++; | ||
379 | lo->plh_return_iomode = 0; | ||
380 | spin_unlock(&inode->i_lock); | ||
381 | pnfs_get_layout_hdr(lo); | ||
382 | |||
383 | /* Send an async layoutreturn so we dont deadlock */ | ||
384 | pnfs_send_layoutreturn(lo, stateid, iomode, false); | ||
385 | } else | ||
386 | spin_unlock(&inode->i_lock); | ||
387 | } | ||
388 | |||
340 | void | 389 | void |
341 | pnfs_put_lseg(struct pnfs_layout_segment *lseg) | 390 | pnfs_put_lseg(struct pnfs_layout_segment *lseg) |
342 | { | 391 | { |
@@ -349,8 +398,17 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) | |||
349 | dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, | 398 | dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, |
350 | atomic_read(&lseg->pls_refcount), | 399 | atomic_read(&lseg->pls_refcount), |
351 | test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); | 400 | test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); |
401 | |||
402 | /* Handle the case where refcount != 1 */ | ||
403 | if (atomic_add_unless(&lseg->pls_refcount, -1, 1)) | ||
404 | return; | ||
405 | |||
352 | lo = lseg->pls_layout; | 406 | lo = lseg->pls_layout; |
353 | inode = lo->plh_inode; | 407 | inode = lo->plh_inode; |
408 | /* Do we need a layoutreturn? */ | ||
409 | if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) | ||
410 | pnfs_layoutreturn_before_put_lseg(lseg, lo, inode); | ||
411 | |||
354 | if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { | 412 | if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { |
355 | pnfs_get_layout_hdr(lo); | 413 | pnfs_get_layout_hdr(lo); |
356 | pnfs_layout_remove_lseg(lo, lseg); | 414 | pnfs_layout_remove_lseg(lo, lseg); |
@@ -543,6 +601,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) | |||
543 | pnfs_get_layout_hdr(lo); | 601 | pnfs_get_layout_hdr(lo); |
544 | pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); | 602 | pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); |
545 | pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); | 603 | pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); |
604 | pnfs_clear_retry_layoutget(lo); | ||
546 | spin_unlock(&nfsi->vfs_inode.i_lock); | 605 | spin_unlock(&nfsi->vfs_inode.i_lock); |
547 | pnfs_free_lseg_list(&tmp_list); | 606 | pnfs_free_lseg_list(&tmp_list); |
548 | pnfs_put_layout_hdr(lo); | 607 | pnfs_put_layout_hdr(lo); |
@@ -740,25 +799,37 @@ pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, | |||
740 | return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); | 799 | return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); |
741 | } | 800 | } |
742 | 801 | ||
802 | static bool | ||
803 | pnfs_layout_returning(const struct pnfs_layout_hdr *lo, | ||
804 | struct pnfs_layout_range *range) | ||
805 | { | ||
806 | return test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) && | ||
807 | (lo->plh_return_iomode == IOMODE_ANY || | ||
808 | lo->plh_return_iomode == range->iomode); | ||
809 | } | ||
810 | |||
743 | /* lget is set to 1 if called from inside send_layoutget call chain */ | 811 | /* lget is set to 1 if called from inside send_layoutget call chain */ |
744 | static bool | 812 | static bool |
745 | pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo, int lget) | 813 | pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo, |
814 | struct pnfs_layout_range *range, int lget) | ||
746 | { | 815 | { |
747 | return lo->plh_block_lgets || | 816 | return lo->plh_block_lgets || |
748 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || | 817 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || |
749 | (list_empty(&lo->plh_segs) && | 818 | (list_empty(&lo->plh_segs) && |
750 | (atomic_read(&lo->plh_outstanding) > lget)); | 819 | (atomic_read(&lo->plh_outstanding) > lget)) || |
820 | pnfs_layout_returning(lo, range); | ||
751 | } | 821 | } |
752 | 822 | ||
753 | int | 823 | int |
754 | pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | 824 | pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, |
825 | struct pnfs_layout_range *range, | ||
755 | struct nfs4_state *open_state) | 826 | struct nfs4_state *open_state) |
756 | { | 827 | { |
757 | int status = 0; | 828 | int status = 0; |
758 | 829 | ||
759 | dprintk("--> %s\n", __func__); | 830 | dprintk("--> %s\n", __func__); |
760 | spin_lock(&lo->plh_inode->i_lock); | 831 | spin_lock(&lo->plh_inode->i_lock); |
761 | if (pnfs_layoutgets_blocked(lo, 1)) { | 832 | if (pnfs_layoutgets_blocked(lo, range, 1)) { |
762 | status = -EAGAIN; | 833 | status = -EAGAIN; |
763 | } else if (!nfs4_valid_open_stateid(open_state)) { | 834 | } else if (!nfs4_valid_open_stateid(open_state)) { |
764 | status = -EBADF; | 835 | status = -EBADF; |
@@ -825,7 +896,9 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
825 | pnfs_layout_io_set_failed(lo, range->iomode); | 896 | pnfs_layout_io_set_failed(lo, range->iomode); |
826 | } | 897 | } |
827 | return NULL; | 898 | return NULL; |
828 | } | 899 | } else |
900 | pnfs_layout_clear_fail_bit(lo, | ||
901 | pnfs_iomode_to_fail_bit(range->iomode)); | ||
829 | 902 | ||
830 | return lseg; | 903 | return lseg; |
831 | } | 904 | } |
@@ -845,6 +918,49 @@ static void pnfs_clear_layoutcommit(struct inode *inode, | |||
845 | } | 918 | } |
846 | } | 919 | } |
847 | 920 | ||
921 | void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) | ||
922 | { | ||
923 | clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); | ||
924 | smp_mb__after_atomic(); | ||
925 | wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); | ||
926 | } | ||
927 | |||
928 | static int | ||
929 | pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, | ||
930 | enum pnfs_iomode iomode, bool sync) | ||
931 | { | ||
932 | struct inode *ino = lo->plh_inode; | ||
933 | struct nfs4_layoutreturn *lrp; | ||
934 | int status = 0; | ||
935 | |||
936 | lrp = kzalloc(sizeof(*lrp), GFP_NOFS); | ||
937 | if (unlikely(lrp == NULL)) { | ||
938 | status = -ENOMEM; | ||
939 | spin_lock(&ino->i_lock); | ||
940 | lo->plh_block_lgets--; | ||
941 | pnfs_clear_layoutreturn_waitbit(lo); | ||
942 | rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq); | ||
943 | spin_unlock(&ino->i_lock); | ||
944 | pnfs_put_layout_hdr(lo); | ||
945 | goto out; | ||
946 | } | ||
947 | |||
948 | lrp->args.stateid = stateid; | ||
949 | lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; | ||
950 | lrp->args.inode = ino; | ||
951 | lrp->args.range.iomode = iomode; | ||
952 | lrp->args.range.offset = 0; | ||
953 | lrp->args.range.length = NFS4_MAX_UINT64; | ||
954 | lrp->args.layout = lo; | ||
955 | lrp->clp = NFS_SERVER(ino)->nfs_client; | ||
956 | lrp->cred = lo->plh_lc_cred; | ||
957 | |||
958 | status = nfs4_proc_layoutreturn(lrp, sync); | ||
959 | out: | ||
960 | dprintk("<-- %s status: %d\n", __func__, status); | ||
961 | return status; | ||
962 | } | ||
963 | |||
848 | /* | 964 | /* |
849 | * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr | 965 | * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr |
850 | * when the layout segment list is empty. | 966 | * when the layout segment list is empty. |
@@ -859,7 +975,6 @@ _pnfs_return_layout(struct inode *ino) | |||
859 | struct pnfs_layout_hdr *lo = NULL; | 975 | struct pnfs_layout_hdr *lo = NULL; |
860 | struct nfs_inode *nfsi = NFS_I(ino); | 976 | struct nfs_inode *nfsi = NFS_I(ino); |
861 | LIST_HEAD(tmp_list); | 977 | LIST_HEAD(tmp_list); |
862 | struct nfs4_layoutreturn *lrp; | ||
863 | nfs4_stateid stateid; | 978 | nfs4_stateid stateid; |
864 | int status = 0, empty; | 979 | int status = 0, empty; |
865 | 980 | ||
@@ -901,24 +1016,7 @@ _pnfs_return_layout(struct inode *ino) | |||
901 | spin_unlock(&ino->i_lock); | 1016 | spin_unlock(&ino->i_lock); |
902 | pnfs_free_lseg_list(&tmp_list); | 1017 | pnfs_free_lseg_list(&tmp_list); |
903 | 1018 | ||
904 | lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); | 1019 | status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); |
905 | if (unlikely(lrp == NULL)) { | ||
906 | status = -ENOMEM; | ||
907 | spin_lock(&ino->i_lock); | ||
908 | lo->plh_block_lgets--; | ||
909 | spin_unlock(&ino->i_lock); | ||
910 | pnfs_put_layout_hdr(lo); | ||
911 | goto out; | ||
912 | } | ||
913 | |||
914 | lrp->args.stateid = stateid; | ||
915 | lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; | ||
916 | lrp->args.inode = ino; | ||
917 | lrp->args.layout = lo; | ||
918 | lrp->clp = NFS_SERVER(ino)->nfs_client; | ||
919 | lrp->cred = lo->plh_lc_cred; | ||
920 | |||
921 | status = nfs4_proc_layoutreturn(lrp); | ||
922 | out: | 1020 | out: |
923 | dprintk("<-- %s status: %d\n", __func__, status); | 1021 | dprintk("<-- %s status: %d\n", __func__, status); |
924 | return status; | 1022 | return status; |
@@ -954,31 +1052,60 @@ pnfs_commit_and_return_layout(struct inode *inode) | |||
954 | 1052 | ||
955 | bool pnfs_roc(struct inode *ino) | 1053 | bool pnfs_roc(struct inode *ino) |
956 | { | 1054 | { |
1055 | struct nfs_inode *nfsi = NFS_I(ino); | ||
1056 | struct nfs_open_context *ctx; | ||
1057 | struct nfs4_state *state; | ||
957 | struct pnfs_layout_hdr *lo; | 1058 | struct pnfs_layout_hdr *lo; |
958 | struct pnfs_layout_segment *lseg, *tmp; | 1059 | struct pnfs_layout_segment *lseg, *tmp; |
1060 | nfs4_stateid stateid; | ||
959 | LIST_HEAD(tmp_list); | 1061 | LIST_HEAD(tmp_list); |
960 | bool found = false; | 1062 | bool found = false, layoutreturn = false; |
961 | 1063 | ||
962 | spin_lock(&ino->i_lock); | 1064 | spin_lock(&ino->i_lock); |
963 | lo = NFS_I(ino)->layout; | 1065 | lo = nfsi->layout; |
964 | if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) || | 1066 | if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) || |
965 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) | 1067 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) |
966 | goto out_nolayout; | 1068 | goto out_noroc; |
1069 | |||
1070 | /* Don't return layout if we hold a delegation */ | ||
1071 | if (nfs4_check_delegation(ino, FMODE_READ)) | ||
1072 | goto out_noroc; | ||
1073 | |||
1074 | list_for_each_entry(ctx, &nfsi->open_files, list) { | ||
1075 | state = ctx->state; | ||
1076 | /* Don't return layout if there is open file state */ | ||
1077 | if (state != NULL && state->state != 0) | ||
1078 | goto out_noroc; | ||
1079 | } | ||
1080 | |||
1081 | pnfs_clear_retry_layoutget(lo); | ||
967 | list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) | 1082 | list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) |
968 | if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { | 1083 | if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { |
969 | mark_lseg_invalid(lseg, &tmp_list); | 1084 | mark_lseg_invalid(lseg, &tmp_list); |
970 | found = true; | 1085 | found = true; |
971 | } | 1086 | } |
972 | if (!found) | 1087 | if (!found) |
973 | goto out_nolayout; | 1088 | goto out_noroc; |
974 | lo->plh_block_lgets++; | 1089 | lo->plh_block_lgets++; |
975 | pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */ | 1090 | pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */ |
976 | spin_unlock(&ino->i_lock); | 1091 | spin_unlock(&ino->i_lock); |
977 | pnfs_free_lseg_list(&tmp_list); | 1092 | pnfs_free_lseg_list(&tmp_list); |
978 | return true; | 1093 | return true; |
979 | 1094 | ||
980 | out_nolayout: | 1095 | out_noroc: |
1096 | if (lo) { | ||
1097 | stateid = lo->plh_stateid; | ||
1098 | layoutreturn = | ||
1099 | test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, | ||
1100 | &lo->plh_flags); | ||
1101 | if (layoutreturn) { | ||
1102 | lo->plh_block_lgets++; | ||
1103 | pnfs_get_layout_hdr(lo); | ||
1104 | } | ||
1105 | } | ||
981 | spin_unlock(&ino->i_lock); | 1106 | spin_unlock(&ino->i_lock); |
1107 | if (layoutreturn) | ||
1108 | pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); | ||
982 | return false; | 1109 | return false; |
983 | } | 1110 | } |
984 | 1111 | ||
@@ -1013,8 +1140,9 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) | |||
1013 | struct nfs_inode *nfsi = NFS_I(ino); | 1140 | struct nfs_inode *nfsi = NFS_I(ino); |
1014 | struct pnfs_layout_hdr *lo; | 1141 | struct pnfs_layout_hdr *lo; |
1015 | struct pnfs_layout_segment *lseg; | 1142 | struct pnfs_layout_segment *lseg; |
1143 | nfs4_stateid stateid; | ||
1016 | u32 current_seqid; | 1144 | u32 current_seqid; |
1017 | bool found = false; | 1145 | bool found = false, layoutreturn = false; |
1018 | 1146 | ||
1019 | spin_lock(&ino->i_lock); | 1147 | spin_lock(&ino->i_lock); |
1020 | list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) | 1148 | list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) |
@@ -1031,7 +1159,21 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) | |||
1031 | */ | 1159 | */ |
1032 | *barrier = current_seqid + atomic_read(&lo->plh_outstanding); | 1160 | *barrier = current_seqid + atomic_read(&lo->plh_outstanding); |
1033 | out: | 1161 | out: |
1162 | if (!found) { | ||
1163 | stateid = lo->plh_stateid; | ||
1164 | layoutreturn = | ||
1165 | test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, | ||
1166 | &lo->plh_flags); | ||
1167 | if (layoutreturn) { | ||
1168 | lo->plh_block_lgets++; | ||
1169 | pnfs_get_layout_hdr(lo); | ||
1170 | } | ||
1171 | } | ||
1034 | spin_unlock(&ino->i_lock); | 1172 | spin_unlock(&ino->i_lock); |
1173 | if (layoutreturn) { | ||
1174 | rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); | ||
1175 | pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false); | ||
1176 | } | ||
1035 | return found; | 1177 | return found; |
1036 | } | 1178 | } |
1037 | 1179 | ||
@@ -1178,6 +1320,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, | |||
1178 | 1320 | ||
1179 | list_for_each_entry(lseg, &lo->plh_segs, pls_list) { | 1321 | list_for_each_entry(lseg, &lo->plh_segs, pls_list) { |
1180 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && | 1322 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && |
1323 | !test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) && | ||
1181 | pnfs_lseg_range_match(&lseg->pls_range, range)) { | 1324 | pnfs_lseg_range_match(&lseg->pls_range, range)) { |
1182 | ret = pnfs_get_lseg(lseg); | 1325 | ret = pnfs_get_lseg(lseg); |
1183 | break; | 1326 | break; |
@@ -1266,6 +1409,35 @@ static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, | |||
1266 | return ret; | 1409 | return ret; |
1267 | } | 1410 | } |
1268 | 1411 | ||
1412 | /* stop waiting if someone clears NFS_LAYOUT_RETRY_LAYOUTGET bit. */ | ||
1413 | static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key) | ||
1414 | { | ||
1415 | if (!test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, key->flags)) | ||
1416 | return 1; | ||
1417 | return nfs_wait_bit_killable(key); | ||
1418 | } | ||
1419 | |||
1420 | static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) | ||
1421 | { | ||
1422 | /* | ||
1423 | * send layoutcommit as it can hold up layoutreturn due to lseg | ||
1424 | * reference | ||
1425 | */ | ||
1426 | pnfs_layoutcommit_inode(lo->plh_inode, false); | ||
1427 | return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN, | ||
1428 | pnfs_layoutget_retry_bit_wait, | ||
1429 | TASK_UNINTERRUPTIBLE); | ||
1430 | } | ||
1431 | |||
1432 | static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo) | ||
1433 | { | ||
1434 | unsigned long *bitlock = &lo->plh_flags; | ||
1435 | |||
1436 | clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock); | ||
1437 | smp_mb__after_atomic(); | ||
1438 | wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); | ||
1439 | } | ||
1440 | |||
1269 | /* | 1441 | /* |
1270 | * Layout segment is retreived from the server if not cached. | 1442 | * Layout segment is retreived from the server if not cached. |
1271 | * The appropriate layout segment is referenced and returned to the caller. | 1443 | * The appropriate layout segment is referenced and returned to the caller. |
@@ -1296,6 +1468,8 @@ pnfs_update_layout(struct inode *ino, | |||
1296 | if (pnfs_within_mdsthreshold(ctx, ino, iomode)) | 1468 | if (pnfs_within_mdsthreshold(ctx, ino, iomode)) |
1297 | goto out; | 1469 | goto out; |
1298 | 1470 | ||
1471 | lookup_again: | ||
1472 | first = false; | ||
1299 | spin_lock(&ino->i_lock); | 1473 | spin_lock(&ino->i_lock); |
1300 | lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); | 1474 | lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); |
1301 | if (lo == NULL) { | 1475 | if (lo == NULL) { |
@@ -1310,27 +1484,62 @@ pnfs_update_layout(struct inode *ino, | |||
1310 | } | 1484 | } |
1311 | 1485 | ||
1312 | /* if LAYOUTGET already failed once we don't try again */ | 1486 | /* if LAYOUTGET already failed once we don't try again */ |
1313 | if (pnfs_layout_io_test_failed(lo, iomode)) | 1487 | if (pnfs_layout_io_test_failed(lo, iomode) && |
1488 | !pnfs_should_retry_layoutget(lo)) | ||
1314 | goto out_unlock; | 1489 | goto out_unlock; |
1315 | 1490 | ||
1316 | /* Check to see if the layout for the given range already exists */ | 1491 | first = list_empty(&lo->plh_segs); |
1317 | lseg = pnfs_find_lseg(lo, &arg); | 1492 | if (first) { |
1318 | if (lseg) | 1493 | /* The first layoutget for the file. Need to serialize per |
1319 | goto out_unlock; | 1494 | * RFC 5661 Errata 3208. |
1495 | */ | ||
1496 | if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, | ||
1497 | &lo->plh_flags)) { | ||
1498 | spin_unlock(&ino->i_lock); | ||
1499 | wait_on_bit(&lo->plh_flags, NFS_LAYOUT_FIRST_LAYOUTGET, | ||
1500 | TASK_UNINTERRUPTIBLE); | ||
1501 | pnfs_put_layout_hdr(lo); | ||
1502 | goto lookup_again; | ||
1503 | } | ||
1504 | } else { | ||
1505 | /* Check to see if the layout for the given range | ||
1506 | * already exists | ||
1507 | */ | ||
1508 | lseg = pnfs_find_lseg(lo, &arg); | ||
1509 | if (lseg) | ||
1510 | goto out_unlock; | ||
1511 | } | ||
1512 | |||
1513 | /* | ||
1514 | * Because we free lsegs before sending LAYOUTRETURN, we need to wait | ||
1515 | * for LAYOUTRETURN even if first is true. | ||
1516 | */ | ||
1517 | if (!lseg && pnfs_should_retry_layoutget(lo) && | ||
1518 | test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { | ||
1519 | spin_unlock(&ino->i_lock); | ||
1520 | dprintk("%s wait for layoutreturn\n", __func__); | ||
1521 | if (pnfs_prepare_to_retry_layoutget(lo)) { | ||
1522 | if (first) | ||
1523 | pnfs_clear_first_layoutget(lo); | ||
1524 | pnfs_put_layout_hdr(lo); | ||
1525 | dprintk("%s retrying\n", __func__); | ||
1526 | goto lookup_again; | ||
1527 | } | ||
1528 | goto out_put_layout_hdr; | ||
1529 | } | ||
1320 | 1530 | ||
1321 | if (pnfs_layoutgets_blocked(lo, 0)) | 1531 | if (pnfs_layoutgets_blocked(lo, &arg, 0)) |
1322 | goto out_unlock; | 1532 | goto out_unlock; |
1323 | atomic_inc(&lo->plh_outstanding); | 1533 | atomic_inc(&lo->plh_outstanding); |
1324 | |||
1325 | first = list_empty(&lo->plh_layouts) ? true : false; | ||
1326 | spin_unlock(&ino->i_lock); | 1534 | spin_unlock(&ino->i_lock); |
1327 | 1535 | ||
1328 | if (first) { | 1536 | if (list_empty(&lo->plh_layouts)) { |
1329 | /* The lo must be on the clp list if there is any | 1537 | /* The lo must be on the clp list if there is any |
1330 | * chance of a CB_LAYOUTRECALL(FILE) coming in. | 1538 | * chance of a CB_LAYOUTRECALL(FILE) coming in. |
1331 | */ | 1539 | */ |
1332 | spin_lock(&clp->cl_lock); | 1540 | spin_lock(&clp->cl_lock); |
1333 | list_add_tail(&lo->plh_layouts, &server->layouts); | 1541 | if (list_empty(&lo->plh_layouts)) |
1542 | list_add_tail(&lo->plh_layouts, &server->layouts); | ||
1334 | spin_unlock(&clp->cl_lock); | 1543 | spin_unlock(&clp->cl_lock); |
1335 | } | 1544 | } |
1336 | 1545 | ||
@@ -1343,8 +1552,11 @@ pnfs_update_layout(struct inode *ino, | |||
1343 | arg.length = PAGE_CACHE_ALIGN(arg.length); | 1552 | arg.length = PAGE_CACHE_ALIGN(arg.length); |
1344 | 1553 | ||
1345 | lseg = send_layoutget(lo, ctx, &arg, gfp_flags); | 1554 | lseg = send_layoutget(lo, ctx, &arg, gfp_flags); |
1555 | pnfs_clear_retry_layoutget(lo); | ||
1346 | atomic_dec(&lo->plh_outstanding); | 1556 | atomic_dec(&lo->plh_outstanding); |
1347 | out_put_layout_hdr: | 1557 | out_put_layout_hdr: |
1558 | if (first) | ||
1559 | pnfs_clear_first_layoutget(lo); | ||
1348 | pnfs_put_layout_hdr(lo); | 1560 | pnfs_put_layout_hdr(lo); |
1349 | out: | 1561 | out: |
1350 | dprintk("%s: inode %s/%llu pNFS layout segment %s for " | 1562 | dprintk("%s: inode %s/%llu pNFS layout segment %s for " |
@@ -1393,7 +1605,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
1393 | goto out_forget_reply; | 1605 | goto out_forget_reply; |
1394 | } | 1606 | } |
1395 | 1607 | ||
1396 | if (pnfs_layoutgets_blocked(lo, 1)) { | 1608 | if (pnfs_layoutgets_blocked(lo, &lgp->args.range, 1)) { |
1397 | dprintk("%s forget reply due to state\n", __func__); | 1609 | dprintk("%s forget reply due to state\n", __func__); |
1398 | goto out_forget_reply; | 1610 | goto out_forget_reply; |
1399 | } | 1611 | } |
@@ -1440,24 +1652,79 @@ out_forget_reply: | |||
1440 | goto out; | 1652 | goto out; |
1441 | } | 1653 | } |
1442 | 1654 | ||
1655 | static void | ||
1656 | pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, | ||
1657 | struct list_head *tmp_list, | ||
1658 | struct pnfs_layout_range *return_range) | ||
1659 | { | ||
1660 | struct pnfs_layout_segment *lseg, *next; | ||
1661 | |||
1662 | dprintk("%s:Begin lo %p\n", __func__, lo); | ||
1663 | |||
1664 | if (list_empty(&lo->plh_segs)) | ||
1665 | return; | ||
1666 | |||
1667 | list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) | ||
1668 | if (should_free_lseg(&lseg->pls_range, return_range)) { | ||
1669 | dprintk("%s: marking lseg %p iomode %d " | ||
1670 | "offset %llu length %llu\n", __func__, | ||
1671 | lseg, lseg->pls_range.iomode, | ||
1672 | lseg->pls_range.offset, | ||
1673 | lseg->pls_range.length); | ||
1674 | set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); | ||
1675 | mark_lseg_invalid(lseg, tmp_list); | ||
1676 | } | ||
1677 | } | ||
1678 | |||
1679 | void pnfs_error_mark_layout_for_return(struct inode *inode, | ||
1680 | struct pnfs_layout_segment *lseg) | ||
1681 | { | ||
1682 | struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; | ||
1683 | int iomode = pnfs_iomode_to_fail_bit(lseg->pls_range.iomode); | ||
1684 | struct pnfs_layout_range range = { | ||
1685 | .iomode = lseg->pls_range.iomode, | ||
1686 | .offset = 0, | ||
1687 | .length = NFS4_MAX_UINT64, | ||
1688 | }; | ||
1689 | LIST_HEAD(free_me); | ||
1690 | |||
1691 | spin_lock(&inode->i_lock); | ||
1692 | /* set failure bit so that pnfs path will be retried later */ | ||
1693 | pnfs_layout_set_fail_bit(lo, iomode); | ||
1694 | set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); | ||
1695 | if (lo->plh_return_iomode == 0) | ||
1696 | lo->plh_return_iomode = range.iomode; | ||
1697 | else if (lo->plh_return_iomode != range.iomode) | ||
1698 | lo->plh_return_iomode = IOMODE_ANY; | ||
1699 | /* | ||
1700 | * mark all matching lsegs so that we are sure to have no live | ||
1701 | * segments at hand when sending layoutreturn. See pnfs_put_lseg() | ||
1702 | * for how it works. | ||
1703 | */ | ||
1704 | pnfs_mark_matching_lsegs_return(lo, &free_me, &range); | ||
1705 | spin_unlock(&inode->i_lock); | ||
1706 | pnfs_free_lseg_list(&free_me); | ||
1707 | } | ||
1708 | EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); | ||
1709 | |||
1443 | void | 1710 | void |
1444 | pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 1711 | pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) |
1445 | { | 1712 | { |
1446 | u64 rd_size = req->wb_bytes; | 1713 | u64 rd_size = req->wb_bytes; |
1447 | 1714 | ||
1448 | WARN_ON_ONCE(pgio->pg_lseg != NULL); | 1715 | if (pgio->pg_lseg == NULL) { |
1449 | 1716 | if (pgio->pg_dreq == NULL) | |
1450 | if (pgio->pg_dreq == NULL) | 1717 | rd_size = i_size_read(pgio->pg_inode) - req_offset(req); |
1451 | rd_size = i_size_read(pgio->pg_inode) - req_offset(req); | 1718 | else |
1452 | else | 1719 | rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); |
1453 | rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); | 1720 | |
1454 | 1721 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | |
1455 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 1722 | req->wb_context, |
1456 | req->wb_context, | 1723 | req_offset(req), |
1457 | req_offset(req), | 1724 | rd_size, |
1458 | rd_size, | 1725 | IOMODE_READ, |
1459 | IOMODE_READ, | 1726 | GFP_KERNEL); |
1460 | GFP_KERNEL); | 1727 | } |
1461 | /* If no lseg, fall back to read through mds */ | 1728 | /* If no lseg, fall back to read through mds */ |
1462 | if (pgio->pg_lseg == NULL) | 1729 | if (pgio->pg_lseg == NULL) |
1463 | nfs_pageio_reset_read_mds(pgio); | 1730 | nfs_pageio_reset_read_mds(pgio); |
@@ -1469,27 +1736,36 @@ void | |||
1469 | pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, | 1736 | pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, |
1470 | struct nfs_page *req, u64 wb_size) | 1737 | struct nfs_page *req, u64 wb_size) |
1471 | { | 1738 | { |
1472 | WARN_ON_ONCE(pgio->pg_lseg != NULL); | 1739 | if (pgio->pg_lseg == NULL) |
1473 | 1740 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | |
1474 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 1741 | req->wb_context, |
1475 | req->wb_context, | 1742 | req_offset(req), |
1476 | req_offset(req), | 1743 | wb_size, |
1477 | wb_size, | 1744 | IOMODE_RW, |
1478 | IOMODE_RW, | 1745 | GFP_NOFS); |
1479 | GFP_NOFS); | ||
1480 | /* If no lseg, fall back to write through mds */ | 1746 | /* If no lseg, fall back to write through mds */ |
1481 | if (pgio->pg_lseg == NULL) | 1747 | if (pgio->pg_lseg == NULL) |
1482 | nfs_pageio_reset_write_mds(pgio); | 1748 | nfs_pageio_reset_write_mds(pgio); |
1483 | } | 1749 | } |
1484 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); | 1750 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); |
1485 | 1751 | ||
1752 | void | ||
1753 | pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *desc) | ||
1754 | { | ||
1755 | if (desc->pg_lseg) { | ||
1756 | pnfs_put_lseg(desc->pg_lseg); | ||
1757 | desc->pg_lseg = NULL; | ||
1758 | } | ||
1759 | } | ||
1760 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); | ||
1761 | |||
1486 | /* | 1762 | /* |
1487 | * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number | 1763 | * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number |
1488 | * of bytes (maximum @req->wb_bytes) that can be coalesced. | 1764 | * of bytes (maximum @req->wb_bytes) that can be coalesced. |
1489 | */ | 1765 | */ |
1490 | size_t | 1766 | size_t |
1491 | pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 1767 | pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, |
1492 | struct nfs_page *req) | 1768 | struct nfs_page *prev, struct nfs_page *req) |
1493 | { | 1769 | { |
1494 | unsigned int size; | 1770 | unsigned int size; |
1495 | u64 seg_end, req_start, seg_left; | 1771 | u64 seg_end, req_start, seg_left; |
@@ -1513,10 +1789,16 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | |||
1513 | seg_end = end_offset(pgio->pg_lseg->pls_range.offset, | 1789 | seg_end = end_offset(pgio->pg_lseg->pls_range.offset, |
1514 | pgio->pg_lseg->pls_range.length); | 1790 | pgio->pg_lseg->pls_range.length); |
1515 | req_start = req_offset(req); | 1791 | req_start = req_offset(req); |
1516 | WARN_ON_ONCE(req_start > seg_end); | 1792 | WARN_ON_ONCE(req_start >= seg_end); |
1517 | /* start of request is past the last byte of this segment */ | 1793 | /* start of request is past the last byte of this segment */ |
1518 | if (req_start >= seg_end) | 1794 | if (req_start >= seg_end) { |
1795 | /* reference the new lseg */ | ||
1796 | if (pgio->pg_ops->pg_cleanup) | ||
1797 | pgio->pg_ops->pg_cleanup(pgio); | ||
1798 | if (pgio->pg_ops->pg_init) | ||
1799 | pgio->pg_ops->pg_init(pgio, req); | ||
1519 | return 0; | 1800 | return 0; |
1801 | } | ||
1520 | 1802 | ||
1521 | /* adjust 'size' iff there are fewer bytes left in the | 1803 | /* adjust 'size' iff there are fewer bytes left in the |
1522 | * segment than what nfs_generic_pg_test returned */ | 1804 | * segment than what nfs_generic_pg_test returned */ |
@@ -1571,10 +1853,12 @@ static void | |||
1571 | pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, | 1853 | pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, |
1572 | struct nfs_pgio_header *hdr) | 1854 | struct nfs_pgio_header *hdr) |
1573 | { | 1855 | { |
1856 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||
1857 | |||
1574 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { | 1858 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { |
1575 | list_splice_tail_init(&hdr->pages, &desc->pg_list); | 1859 | list_splice_tail_init(&hdr->pages, &mirror->pg_list); |
1576 | nfs_pageio_reset_write_mds(desc); | 1860 | nfs_pageio_reset_write_mds(desc); |
1577 | desc->pg_recoalesce = 1; | 1861 | mirror->pg_recoalesce = 1; |
1578 | } | 1862 | } |
1579 | nfs_pgio_data_destroy(hdr); | 1863 | nfs_pgio_data_destroy(hdr); |
1580 | } | 1864 | } |
@@ -1608,11 +1892,9 @@ pnfs_do_write(struct nfs_pageio_descriptor *desc, | |||
1608 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | 1892 | struct pnfs_layout_segment *lseg = desc->pg_lseg; |
1609 | enum pnfs_try_status trypnfs; | 1893 | enum pnfs_try_status trypnfs; |
1610 | 1894 | ||
1611 | desc->pg_lseg = NULL; | ||
1612 | trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); | 1895 | trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); |
1613 | if (trypnfs == PNFS_NOT_ATTEMPTED) | 1896 | if (trypnfs == PNFS_NOT_ATTEMPTED) |
1614 | pnfs_write_through_mds(desc, hdr); | 1897 | pnfs_write_through_mds(desc, hdr); |
1615 | pnfs_put_lseg(lseg); | ||
1616 | } | 1898 | } |
1617 | 1899 | ||
1618 | static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) | 1900 | static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) |
@@ -1625,24 +1907,23 @@ EXPORT_SYMBOL_GPL(pnfs_writehdr_free); | |||
1625 | int | 1907 | int |
1626 | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | 1908 | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) |
1627 | { | 1909 | { |
1910 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||
1911 | |||
1628 | struct nfs_pgio_header *hdr; | 1912 | struct nfs_pgio_header *hdr; |
1629 | int ret; | 1913 | int ret; |
1630 | 1914 | ||
1631 | hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); | 1915 | hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); |
1632 | if (!hdr) { | 1916 | if (!hdr) { |
1633 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | 1917 | desc->pg_completion_ops->error_cleanup(&mirror->pg_list); |
1634 | pnfs_put_lseg(desc->pg_lseg); | ||
1635 | desc->pg_lseg = NULL; | ||
1636 | return -ENOMEM; | 1918 | return -ENOMEM; |
1637 | } | 1919 | } |
1638 | nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); | 1920 | nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); |
1921 | |||
1639 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); | 1922 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); |
1640 | ret = nfs_generic_pgio(desc, hdr); | 1923 | ret = nfs_generic_pgio(desc, hdr); |
1641 | if (ret != 0) { | 1924 | if (!ret) |
1642 | pnfs_put_lseg(desc->pg_lseg); | ||
1643 | desc->pg_lseg = NULL; | ||
1644 | } else | ||
1645 | pnfs_do_write(desc, hdr, desc->pg_ioflags); | 1925 | pnfs_do_write(desc, hdr, desc->pg_ioflags); |
1926 | |||
1646 | return ret; | 1927 | return ret; |
1647 | } | 1928 | } |
1648 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); | 1929 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); |
@@ -1687,10 +1968,12 @@ static void | |||
1687 | pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, | 1968 | pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, |
1688 | struct nfs_pgio_header *hdr) | 1969 | struct nfs_pgio_header *hdr) |
1689 | { | 1970 | { |
1971 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||
1972 | |||
1690 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { | 1973 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { |
1691 | list_splice_tail_init(&hdr->pages, &desc->pg_list); | 1974 | list_splice_tail_init(&hdr->pages, &mirror->pg_list); |
1692 | nfs_pageio_reset_read_mds(desc); | 1975 | nfs_pageio_reset_read_mds(desc); |
1693 | desc->pg_recoalesce = 1; | 1976 | mirror->pg_recoalesce = 1; |
1694 | } | 1977 | } |
1695 | nfs_pgio_data_destroy(hdr); | 1978 | nfs_pgio_data_destroy(hdr); |
1696 | } | 1979 | } |
@@ -1719,18 +2002,29 @@ pnfs_try_to_read_data(struct nfs_pgio_header *hdr, | |||
1719 | return trypnfs; | 2002 | return trypnfs; |
1720 | } | 2003 | } |
1721 | 2004 | ||
2005 | /* Resend all requests through pnfs. */ | ||
2006 | int pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) | ||
2007 | { | ||
2008 | struct nfs_pageio_descriptor pgio; | ||
2009 | |||
2010 | nfs_pageio_init_read(&pgio, hdr->inode, false, hdr->completion_ops); | ||
2011 | return nfs_pageio_resend(&pgio, hdr); | ||
2012 | } | ||
2013 | EXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs); | ||
2014 | |||
1722 | static void | 2015 | static void |
1723 | pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) | 2016 | pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) |
1724 | { | 2017 | { |
1725 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; | 2018 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; |
1726 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | 2019 | struct pnfs_layout_segment *lseg = desc->pg_lseg; |
1727 | enum pnfs_try_status trypnfs; | 2020 | enum pnfs_try_status trypnfs; |
2021 | int err = 0; | ||
1728 | 2022 | ||
1729 | desc->pg_lseg = NULL; | ||
1730 | trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); | 2023 | trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); |
1731 | if (trypnfs == PNFS_NOT_ATTEMPTED) | 2024 | if (trypnfs == PNFS_TRY_AGAIN) |
2025 | err = pnfs_read_resend_pnfs(hdr); | ||
2026 | if (trypnfs == PNFS_NOT_ATTEMPTED || err) | ||
1732 | pnfs_read_through_mds(desc, hdr); | 2027 | pnfs_read_through_mds(desc, hdr); |
1733 | pnfs_put_lseg(lseg); | ||
1734 | } | 2028 | } |
1735 | 2029 | ||
1736 | static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) | 2030 | static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) |
@@ -1743,24 +2037,20 @@ EXPORT_SYMBOL_GPL(pnfs_readhdr_free); | |||
1743 | int | 2037 | int |
1744 | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | 2038 | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) |
1745 | { | 2039 | { |
2040 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | ||
2041 | |||
1746 | struct nfs_pgio_header *hdr; | 2042 | struct nfs_pgio_header *hdr; |
1747 | int ret; | 2043 | int ret; |
1748 | 2044 | ||
1749 | hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); | 2045 | hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); |
1750 | if (!hdr) { | 2046 | if (!hdr) { |
1751 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | 2047 | desc->pg_completion_ops->error_cleanup(&mirror->pg_list); |
1752 | ret = -ENOMEM; | 2048 | return -ENOMEM; |
1753 | pnfs_put_lseg(desc->pg_lseg); | ||
1754 | desc->pg_lseg = NULL; | ||
1755 | return ret; | ||
1756 | } | 2049 | } |
1757 | nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); | 2050 | nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); |
1758 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); | 2051 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); |
1759 | ret = nfs_generic_pgio(desc, hdr); | 2052 | ret = nfs_generic_pgio(desc, hdr); |
1760 | if (ret != 0) { | 2053 | if (!ret) |
1761 | pnfs_put_lseg(desc->pg_lseg); | ||
1762 | desc->pg_lseg = NULL; | ||
1763 | } else | ||
1764 | pnfs_do_read(desc, hdr); | 2054 | pnfs_do_read(desc, hdr); |
1765 | return ret; | 2055 | return ret; |
1766 | } | 2056 | } |
@@ -1966,6 +2256,7 @@ clear_layoutcommitting: | |||
1966 | pnfs_clear_layoutcommitting(inode); | 2256 | pnfs_clear_layoutcommitting(inode); |
1967 | goto out; | 2257 | goto out; |
1968 | } | 2258 | } |
2259 | EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); | ||
1969 | 2260 | ||
1970 | struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) | 2261 | struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) |
1971 | { | 2262 | { |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 9ae5b765b073..797cd6253adf 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -38,6 +38,25 @@ enum { | |||
38 | NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ | 38 | NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ |
39 | NFS_LSEG_ROC, /* roc bit received from server */ | 39 | NFS_LSEG_ROC, /* roc bit received from server */ |
40 | NFS_LSEG_LAYOUTCOMMIT, /* layoutcommit bit set for layoutcommit */ | 40 | NFS_LSEG_LAYOUTCOMMIT, /* layoutcommit bit set for layoutcommit */ |
41 | NFS_LSEG_LAYOUTRETURN, /* layoutreturn bit set for layoutreturn */ | ||
42 | }; | ||
43 | |||
44 | /* Individual ip address */ | ||
45 | struct nfs4_pnfs_ds_addr { | ||
46 | struct sockaddr_storage da_addr; | ||
47 | size_t da_addrlen; | ||
48 | struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */ | ||
49 | char *da_remotestr; /* human readable addr+port */ | ||
50 | }; | ||
51 | |||
52 | struct nfs4_pnfs_ds { | ||
53 | struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ | ||
54 | char *ds_remotestr; /* comma sep list of addrs */ | ||
55 | struct list_head ds_addrs; | ||
56 | struct nfs_client *ds_clp; | ||
57 | atomic_t ds_count; | ||
58 | unsigned long ds_state; | ||
59 | #define NFS4DS_CONNECTING 0 /* ds is establishing connection */ | ||
41 | }; | 60 | }; |
42 | 61 | ||
43 | struct pnfs_layout_segment { | 62 | struct pnfs_layout_segment { |
@@ -53,19 +72,34 @@ struct pnfs_layout_segment { | |||
53 | enum pnfs_try_status { | 72 | enum pnfs_try_status { |
54 | PNFS_ATTEMPTED = 0, | 73 | PNFS_ATTEMPTED = 0, |
55 | PNFS_NOT_ATTEMPTED = 1, | 74 | PNFS_NOT_ATTEMPTED = 1, |
75 | PNFS_TRY_AGAIN = 2, | ||
56 | }; | 76 | }; |
57 | 77 | ||
58 | #ifdef CONFIG_NFS_V4_1 | 78 | #ifdef CONFIG_NFS_V4_1 |
59 | 79 | ||
60 | #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" | 80 | #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" |
61 | 81 | ||
82 | /* | ||
83 | * Default data server connection timeout and retrans vaules. | ||
84 | * Set by module parameters dataserver_timeo and dataserver_retrans. | ||
85 | */ | ||
86 | #define NFS4_DEF_DS_TIMEO 600 /* in tenths of a second */ | ||
87 | #define NFS4_DEF_DS_RETRANS 5 | ||
88 | |||
89 | /* error codes for internal use */ | ||
90 | #define NFS4ERR_RESET_TO_MDS 12001 | ||
91 | #define NFS4ERR_RESET_TO_PNFS 12002 | ||
92 | |||
62 | enum { | 93 | enum { |
63 | NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ | 94 | NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ |
64 | NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ | 95 | NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ |
65 | NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ | 96 | NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ |
66 | NFS_LAYOUT_ROC, /* some lseg had roc bit set */ | 97 | NFS_LAYOUT_ROC, /* some lseg had roc bit set */ |
67 | NFS_LAYOUT_RETURN, /* Return this layout ASAP */ | 98 | NFS_LAYOUT_RETURN, /* Return this layout ASAP */ |
99 | NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */ | ||
68 | NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ | 100 | NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ |
101 | NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ | ||
102 | NFS_LAYOUT_RETRY_LAYOUTGET, /* Retry layoutget */ | ||
69 | }; | 103 | }; |
70 | 104 | ||
71 | enum layoutdriver_policy_flags { | 105 | enum layoutdriver_policy_flags { |
@@ -106,7 +140,8 @@ struct pnfs_layoutdriver_type { | |||
106 | struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode); | 140 | struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode); |
107 | void (*mark_request_commit) (struct nfs_page *req, | 141 | void (*mark_request_commit) (struct nfs_page *req, |
108 | struct pnfs_layout_segment *lseg, | 142 | struct pnfs_layout_segment *lseg, |
109 | struct nfs_commit_info *cinfo); | 143 | struct nfs_commit_info *cinfo, |
144 | u32 ds_commit_idx); | ||
110 | void (*clear_request_commit) (struct nfs_page *req, | 145 | void (*clear_request_commit) (struct nfs_page *req, |
111 | struct nfs_commit_info *cinfo); | 146 | struct nfs_commit_info *cinfo); |
112 | int (*scan_commit_lists) (struct nfs_commit_info *cinfo, | 147 | int (*scan_commit_lists) (struct nfs_commit_info *cinfo, |
@@ -154,6 +189,7 @@ struct pnfs_layout_hdr { | |||
154 | u32 plh_barrier; /* ignore lower seqids */ | 189 | u32 plh_barrier; /* ignore lower seqids */ |
155 | unsigned long plh_retry_timestamp; | 190 | unsigned long plh_retry_timestamp; |
156 | unsigned long plh_flags; | 191 | unsigned long plh_flags; |
192 | enum pnfs_iomode plh_return_iomode; | ||
157 | loff_t plh_lwb; /* last write byte for layoutcommit */ | 193 | loff_t plh_lwb; /* last write byte for layoutcommit */ |
158 | struct rpc_cred *plh_lc_cred; /* layoutcommit cred */ | 194 | struct rpc_cred *plh_lc_cred; /* layoutcommit cred */ |
159 | struct inode *plh_inode; | 195 | struct inode *plh_inode; |
@@ -185,7 +221,7 @@ extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, | |||
185 | struct pnfs_device *dev, | 221 | struct pnfs_device *dev, |
186 | struct rpc_cred *cred); | 222 | struct rpc_cred *cred); |
187 | extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); | 223 | extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); |
188 | extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); | 224 | extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync); |
189 | 225 | ||
190 | /* pnfs.c */ | 226 | /* pnfs.c */ |
191 | void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); | 227 | void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); |
@@ -198,6 +234,7 @@ void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page * | |||
198 | int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); | 234 | int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); |
199 | void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, | 235 | void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, |
200 | struct nfs_page *req, u64 wb_size); | 236 | struct nfs_page *req, u64 wb_size); |
237 | void pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *); | ||
201 | int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); | 238 | int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); |
202 | size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, | 239 | size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, |
203 | struct nfs_page *prev, struct nfs_page *req); | 240 | struct nfs_page *prev, struct nfs_page *req); |
@@ -217,6 +254,7 @@ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, | |||
217 | bool update_barrier); | 254 | bool update_barrier); |
218 | int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, | 255 | int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, |
219 | struct pnfs_layout_hdr *lo, | 256 | struct pnfs_layout_hdr *lo, |
257 | struct pnfs_layout_range *range, | ||
220 | struct nfs4_state *open_state); | 258 | struct nfs4_state *open_state); |
221 | int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | 259 | int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, |
222 | struct list_head *tmp_list, | 260 | struct list_head *tmp_list, |
@@ -233,17 +271,21 @@ int _pnfs_return_layout(struct inode *); | |||
233 | int pnfs_commit_and_return_layout(struct inode *); | 271 | int pnfs_commit_and_return_layout(struct inode *); |
234 | void pnfs_ld_write_done(struct nfs_pgio_header *); | 272 | void pnfs_ld_write_done(struct nfs_pgio_header *); |
235 | void pnfs_ld_read_done(struct nfs_pgio_header *); | 273 | void pnfs_ld_read_done(struct nfs_pgio_header *); |
274 | int pnfs_read_resend_pnfs(struct nfs_pgio_header *); | ||
236 | struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, | 275 | struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, |
237 | struct nfs_open_context *ctx, | 276 | struct nfs_open_context *ctx, |
238 | loff_t pos, | 277 | loff_t pos, |
239 | u64 count, | 278 | u64 count, |
240 | enum pnfs_iomode iomode, | 279 | enum pnfs_iomode iomode, |
241 | gfp_t gfp_flags); | 280 | gfp_t gfp_flags); |
281 | void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo); | ||
242 | 282 | ||
243 | void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); | 283 | void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); |
244 | int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *); | 284 | int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *); |
245 | int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *); | 285 | int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *); |
246 | struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); | 286 | struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); |
287 | void pnfs_error_mark_layout_for_return(struct inode *inode, | ||
288 | struct pnfs_layout_segment *lseg); | ||
247 | 289 | ||
248 | /* nfs4_deviceid_flags */ | 290 | /* nfs4_deviceid_flags */ |
249 | enum { | 291 | enum { |
@@ -275,6 +317,39 @@ void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node); | |||
275 | bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); | 317 | bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); |
276 | void nfs4_deviceid_purge_client(const struct nfs_client *); | 318 | void nfs4_deviceid_purge_client(const struct nfs_client *); |
277 | 319 | ||
320 | /* pnfs_nfs.c */ | ||
321 | void pnfs_generic_clear_request_commit(struct nfs_page *req, | ||
322 | struct nfs_commit_info *cinfo); | ||
323 | void pnfs_generic_commit_release(void *calldata); | ||
324 | void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data); | ||
325 | void pnfs_generic_rw_release(void *data); | ||
326 | void pnfs_generic_recover_commit_reqs(struct list_head *dst, | ||
327 | struct nfs_commit_info *cinfo); | ||
328 | int pnfs_generic_commit_pagelist(struct inode *inode, | ||
329 | struct list_head *mds_pages, | ||
330 | int how, | ||
331 | struct nfs_commit_info *cinfo, | ||
332 | int (*initiate_commit)(struct nfs_commit_data *data, | ||
333 | int how)); | ||
334 | int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max); | ||
335 | void pnfs_generic_write_commit_done(struct rpc_task *task, void *data); | ||
336 | void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds); | ||
337 | struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs, | ||
338 | gfp_t gfp_flags); | ||
339 | void nfs4_pnfs_v3_ds_connect_unload(void); | ||
340 | void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, | ||
341 | struct nfs4_deviceid_node *devid, unsigned int timeo, | ||
342 | unsigned int retrans, u32 version, u32 minor_version, | ||
343 | rpc_authflavor_t au_flavor); | ||
344 | struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net, | ||
345 | struct xdr_stream *xdr, | ||
346 | gfp_t gfp_flags); | ||
347 | |||
348 | static inline bool nfs_have_layout(struct inode *inode) | ||
349 | { | ||
350 | return NFS_I(inode)->layout != NULL; | ||
351 | } | ||
352 | |||
278 | static inline struct nfs4_deviceid_node * | 353 | static inline struct nfs4_deviceid_node * |
279 | nfs4_get_deviceid(struct nfs4_deviceid_node *d) | 354 | nfs4_get_deviceid(struct nfs4_deviceid_node *d) |
280 | { | 355 | { |
@@ -282,6 +357,26 @@ nfs4_get_deviceid(struct nfs4_deviceid_node *d) | |||
282 | return d; | 357 | return d; |
283 | } | 358 | } |
284 | 359 | ||
360 | static inline void pnfs_set_retry_layoutget(struct pnfs_layout_hdr *lo) | ||
361 | { | ||
362 | if (!test_and_set_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags)) | ||
363 | atomic_inc(&lo->plh_refcount); | ||
364 | } | ||
365 | |||
366 | static inline void pnfs_clear_retry_layoutget(struct pnfs_layout_hdr *lo) | ||
367 | { | ||
368 | if (test_and_clear_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags)) { | ||
369 | atomic_dec(&lo->plh_refcount); | ||
370 | /* wake up waiters for LAYOUTRETURN as that is not needed */ | ||
371 | wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); | ||
372 | } | ||
373 | } | ||
374 | |||
375 | static inline bool pnfs_should_retry_layoutget(struct pnfs_layout_hdr *lo) | ||
376 | { | ||
377 | return test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags); | ||
378 | } | ||
379 | |||
285 | static inline struct pnfs_layout_segment * | 380 | static inline struct pnfs_layout_segment * |
286 | pnfs_get_lseg(struct pnfs_layout_segment *lseg) | 381 | pnfs_get_lseg(struct pnfs_layout_segment *lseg) |
287 | { | 382 | { |
@@ -317,16 +412,22 @@ pnfs_get_ds_info(struct inode *inode) | |||
317 | return ld->get_ds_info(inode); | 412 | return ld->get_ds_info(inode); |
318 | } | 413 | } |
319 | 414 | ||
415 | static inline void | ||
416 | pnfs_generic_mark_devid_invalid(struct nfs4_deviceid_node *node) | ||
417 | { | ||
418 | set_bit(NFS_DEVICEID_INVALID, &node->flags); | ||
419 | } | ||
420 | |||
320 | static inline bool | 421 | static inline bool |
321 | pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, | 422 | pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, |
322 | struct nfs_commit_info *cinfo) | 423 | struct nfs_commit_info *cinfo, u32 ds_commit_idx) |
323 | { | 424 | { |
324 | struct inode *inode = req->wb_context->dentry->d_inode; | 425 | struct inode *inode = req->wb_context->dentry->d_inode; |
325 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; | 426 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; |
326 | 427 | ||
327 | if (lseg == NULL || ld->mark_request_commit == NULL) | 428 | if (lseg == NULL || ld->mark_request_commit == NULL) |
328 | return false; | 429 | return false; |
329 | ld->mark_request_commit(req, lseg, cinfo); | 430 | ld->mark_request_commit(req, lseg, cinfo, ds_commit_idx); |
330 | return true; | 431 | return true; |
331 | } | 432 | } |
332 | 433 | ||
@@ -352,15 +453,6 @@ pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo, | |||
352 | return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max); | 453 | return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max); |
353 | } | 454 | } |
354 | 455 | ||
355 | static inline void | ||
356 | pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list, | ||
357 | struct nfs_commit_info *cinfo) | ||
358 | { | ||
359 | if (cinfo->ds == NULL || cinfo->ds->nwritten == 0) | ||
360 | return; | ||
361 | NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); | ||
362 | } | ||
363 | |||
364 | static inline struct nfs_page * | 456 | static inline struct nfs_page * |
365 | pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, | 457 | pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, |
366 | struct page *page) | 458 | struct page *page) |
@@ -427,6 +519,11 @@ static inline void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id) | |||
427 | #endif /* NFS_DEBUG */ | 519 | #endif /* NFS_DEBUG */ |
428 | #else /* CONFIG_NFS_V4_1 */ | 520 | #else /* CONFIG_NFS_V4_1 */ |
429 | 521 | ||
522 | static inline bool nfs_have_layout(struct inode *inode) | ||
523 | { | ||
524 | return false; | ||
525 | } | ||
526 | |||
430 | static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) | 527 | static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) |
431 | { | 528 | { |
432 | } | 529 | } |
@@ -513,7 +610,7 @@ pnfs_get_ds_info(struct inode *inode) | |||
513 | 610 | ||
514 | static inline bool | 611 | static inline bool |
515 | pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, | 612 | pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, |
516 | struct nfs_commit_info *cinfo) | 613 | struct nfs_commit_info *cinfo, u32 ds_commit_idx) |
517 | { | 614 | { |
518 | return false; | 615 | return false; |
519 | } | 616 | } |
@@ -531,12 +628,6 @@ pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo, | |||
531 | return 0; | 628 | return 0; |
532 | } | 629 | } |
533 | 630 | ||
534 | static inline void | ||
535 | pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list, | ||
536 | struct nfs_commit_info *cinfo) | ||
537 | { | ||
538 | } | ||
539 | |||
540 | static inline struct nfs_page * | 631 | static inline struct nfs_page * |
541 | pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, | 632 | pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, |
542 | struct page *page) | 633 | struct page *page) |
@@ -568,6 +659,10 @@ static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) | |||
568 | return NULL; | 659 | return NULL; |
569 | } | 660 | } |
570 | 661 | ||
662 | static inline void nfs4_pnfs_v3_ds_connect_unload(void) | ||
663 | { | ||
664 | } | ||
665 | |||
571 | #endif /* CONFIG_NFS_V4_1 */ | 666 | #endif /* CONFIG_NFS_V4_1 */ |
572 | 667 | ||
573 | #endif /* FS_NFS_PNFS_H */ | 668 | #endif /* FS_NFS_PNFS_H */ |
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c new file mode 100644 index 000000000000..fdc4f6562bb7 --- /dev/null +++ b/fs/nfs/pnfs_nfs.c | |||
@@ -0,0 +1,840 @@ | |||
1 | /* | ||
2 | * Common NFS I/O operations for the pnfs file based | ||
3 | * layout drivers. | ||
4 | * | ||
5 | * Copyright (c) 2014, Primary Data, Inc. All rights reserved. | ||
6 | * | ||
7 | * Tom Haynes <loghyr@primarydata.com> | ||
8 | */ | ||
9 | |||
10 | #include <linux/nfs_fs.h> | ||
11 | #include <linux/nfs_page.h> | ||
12 | #include <linux/sunrpc/addr.h> | ||
13 | #include <linux/module.h> | ||
14 | |||
15 | #include "nfs4session.h" | ||
16 | #include "internal.h" | ||
17 | #include "pnfs.h" | ||
18 | |||
19 | #define NFSDBG_FACILITY NFSDBG_PNFS | ||
20 | |||
21 | void pnfs_generic_rw_release(void *data) | ||
22 | { | ||
23 | struct nfs_pgio_header *hdr = data; | ||
24 | |||
25 | nfs_put_client(hdr->ds_clp); | ||
26 | hdr->mds_ops->rpc_release(data); | ||
27 | } | ||
28 | EXPORT_SYMBOL_GPL(pnfs_generic_rw_release); | ||
29 | |||
30 | /* Fake up some data that will cause nfs_commit_release to retry the writes. */ | ||
31 | void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data) | ||
32 | { | ||
33 | struct nfs_page *first = nfs_list_entry(data->pages.next); | ||
34 | |||
35 | data->task.tk_status = 0; | ||
36 | memcpy(&data->verf.verifier, &first->wb_verf, | ||
37 | sizeof(data->verf.verifier)); | ||
38 | data->verf.verifier.data[0]++; /* ensure verifier mismatch */ | ||
39 | } | ||
40 | EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes); | ||
41 | |||
42 | void pnfs_generic_write_commit_done(struct rpc_task *task, void *data) | ||
43 | { | ||
44 | struct nfs_commit_data *wdata = data; | ||
45 | |||
46 | /* Note this may cause RPC to be resent */ | ||
47 | wdata->mds_ops->rpc_call_done(task, data); | ||
48 | } | ||
49 | EXPORT_SYMBOL_GPL(pnfs_generic_write_commit_done); | ||
50 | |||
51 | void pnfs_generic_commit_release(void *calldata) | ||
52 | { | ||
53 | struct nfs_commit_data *data = calldata; | ||
54 | |||
55 | data->completion_ops->completion(data); | ||
56 | pnfs_put_lseg(data->lseg); | ||
57 | nfs_put_client(data->ds_clp); | ||
58 | nfs_commitdata_release(data); | ||
59 | } | ||
60 | EXPORT_SYMBOL_GPL(pnfs_generic_commit_release); | ||
61 | |||
62 | /* The generic layer is about to remove the req from the commit list. | ||
63 | * If this will make the bucket empty, it will need to put the lseg reference. | ||
64 | * Note this must be called holding the inode (/cinfo) lock | ||
65 | */ | ||
66 | void | ||
67 | pnfs_generic_clear_request_commit(struct nfs_page *req, | ||
68 | struct nfs_commit_info *cinfo) | ||
69 | { | ||
70 | struct pnfs_layout_segment *freeme = NULL; | ||
71 | |||
72 | if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) | ||
73 | goto out; | ||
74 | cinfo->ds->nwritten--; | ||
75 | if (list_is_singular(&req->wb_list)) { | ||
76 | struct pnfs_commit_bucket *bucket; | ||
77 | |||
78 | bucket = list_first_entry(&req->wb_list, | ||
79 | struct pnfs_commit_bucket, | ||
80 | written); | ||
81 | freeme = bucket->wlseg; | ||
82 | bucket->wlseg = NULL; | ||
83 | } | ||
84 | out: | ||
85 | nfs_request_remove_commit_list(req, cinfo); | ||
86 | pnfs_put_lseg_locked(freeme); | ||
87 | } | ||
88 | EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit); | ||
89 | |||
90 | static int | ||
91 | pnfs_generic_transfer_commit_list(struct list_head *src, struct list_head *dst, | ||
92 | struct nfs_commit_info *cinfo, int max) | ||
93 | { | ||
94 | struct nfs_page *req, *tmp; | ||
95 | int ret = 0; | ||
96 | |||
97 | list_for_each_entry_safe(req, tmp, src, wb_list) { | ||
98 | if (!nfs_lock_request(req)) | ||
99 | continue; | ||
100 | kref_get(&req->wb_kref); | ||
101 | if (cond_resched_lock(cinfo->lock)) | ||
102 | list_safe_reset_next(req, tmp, wb_list); | ||
103 | nfs_request_remove_commit_list(req, cinfo); | ||
104 | clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); | ||
105 | nfs_list_add_request(req, dst); | ||
106 | ret++; | ||
107 | if ((ret == max) && !cinfo->dreq) | ||
108 | break; | ||
109 | } | ||
110 | return ret; | ||
111 | } | ||
112 | |||
113 | static int | ||
114 | pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, | ||
115 | struct nfs_commit_info *cinfo, | ||
116 | int max) | ||
117 | { | ||
118 | struct list_head *src = &bucket->written; | ||
119 | struct list_head *dst = &bucket->committing; | ||
120 | int ret; | ||
121 | |||
122 | lockdep_assert_held(cinfo->lock); | ||
123 | ret = pnfs_generic_transfer_commit_list(src, dst, cinfo, max); | ||
124 | if (ret) { | ||
125 | cinfo->ds->nwritten -= ret; | ||
126 | cinfo->ds->ncommitting += ret; | ||
127 | bucket->clseg = bucket->wlseg; | ||
128 | if (list_empty(src)) | ||
129 | bucket->wlseg = NULL; | ||
130 | else | ||
131 | pnfs_get_lseg(bucket->clseg); | ||
132 | } | ||
133 | return ret; | ||
134 | } | ||
135 | |||
136 | /* Move reqs from written to committing lists, returning count | ||
137 | * of number moved. | ||
138 | */ | ||
139 | int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, | ||
140 | int max) | ||
141 | { | ||
142 | int i, rv = 0, cnt; | ||
143 | |||
144 | lockdep_assert_held(cinfo->lock); | ||
145 | for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { | ||
146 | cnt = pnfs_generic_scan_ds_commit_list(&cinfo->ds->buckets[i], | ||
147 | cinfo, max); | ||
148 | max -= cnt; | ||
149 | rv += cnt; | ||
150 | } | ||
151 | return rv; | ||
152 | } | ||
153 | EXPORT_SYMBOL_GPL(pnfs_generic_scan_commit_lists); | ||
154 | |||
155 | /* Pull everything off the committing lists and dump into @dst. */ | ||
156 | void pnfs_generic_recover_commit_reqs(struct list_head *dst, | ||
157 | struct nfs_commit_info *cinfo) | ||
158 | { | ||
159 | struct pnfs_commit_bucket *b; | ||
160 | struct pnfs_layout_segment *freeme; | ||
161 | int i; | ||
162 | |||
163 | lockdep_assert_held(cinfo->lock); | ||
164 | restart: | ||
165 | for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { | ||
166 | if (pnfs_generic_transfer_commit_list(&b->written, dst, | ||
167 | cinfo, 0)) { | ||
168 | freeme = b->wlseg; | ||
169 | b->wlseg = NULL; | ||
170 | spin_unlock(cinfo->lock); | ||
171 | pnfs_put_lseg(freeme); | ||
172 | spin_lock(cinfo->lock); | ||
173 | goto restart; | ||
174 | } | ||
175 | } | ||
176 | cinfo->ds->nwritten = 0; | ||
177 | } | ||
178 | EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs); | ||
179 | |||
180 | static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) | ||
181 | { | ||
182 | struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; | ||
183 | struct pnfs_commit_bucket *bucket; | ||
184 | struct pnfs_layout_segment *freeme; | ||
185 | int i; | ||
186 | |||
187 | for (i = idx; i < fl_cinfo->nbuckets; i++) { | ||
188 | bucket = &fl_cinfo->buckets[i]; | ||
189 | if (list_empty(&bucket->committing)) | ||
190 | continue; | ||
191 | nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo, i); | ||
192 | spin_lock(cinfo->lock); | ||
193 | freeme = bucket->clseg; | ||
194 | bucket->clseg = NULL; | ||
195 | spin_unlock(cinfo->lock); | ||
196 | pnfs_put_lseg(freeme); | ||
197 | } | ||
198 | } | ||
199 | |||
200 | static unsigned int | ||
201 | pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo, | ||
202 | struct list_head *list) | ||
203 | { | ||
204 | struct pnfs_ds_commit_info *fl_cinfo; | ||
205 | struct pnfs_commit_bucket *bucket; | ||
206 | struct nfs_commit_data *data; | ||
207 | int i; | ||
208 | unsigned int nreq = 0; | ||
209 | |||
210 | fl_cinfo = cinfo->ds; | ||
211 | bucket = fl_cinfo->buckets; | ||
212 | for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { | ||
213 | if (list_empty(&bucket->committing)) | ||
214 | continue; | ||
215 | data = nfs_commitdata_alloc(); | ||
216 | if (!data) | ||
217 | break; | ||
218 | data->ds_commit_index = i; | ||
219 | spin_lock(cinfo->lock); | ||
220 | data->lseg = bucket->clseg; | ||
221 | bucket->clseg = NULL; | ||
222 | spin_unlock(cinfo->lock); | ||
223 | list_add(&data->pages, list); | ||
224 | nreq++; | ||
225 | } | ||
226 | |||
227 | /* Clean up on error */ | ||
228 | pnfs_generic_retry_commit(cinfo, i); | ||
229 | return nreq; | ||
230 | } | ||
231 | |||
232 | /* This follows nfs_commit_list pretty closely */ | ||
233 | int | ||
234 | pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, | ||
235 | int how, struct nfs_commit_info *cinfo, | ||
236 | int (*initiate_commit)(struct nfs_commit_data *data, | ||
237 | int how)) | ||
238 | { | ||
239 | struct nfs_commit_data *data, *tmp; | ||
240 | LIST_HEAD(list); | ||
241 | unsigned int nreq = 0; | ||
242 | |||
243 | if (!list_empty(mds_pages)) { | ||
244 | data = nfs_commitdata_alloc(); | ||
245 | if (data != NULL) { | ||
246 | data->lseg = NULL; | ||
247 | list_add(&data->pages, &list); | ||
248 | nreq++; | ||
249 | } else { | ||
250 | nfs_retry_commit(mds_pages, NULL, cinfo, 0); | ||
251 | pnfs_generic_retry_commit(cinfo, 0); | ||
252 | cinfo->completion_ops->error_cleanup(NFS_I(inode)); | ||
253 | return -ENOMEM; | ||
254 | } | ||
255 | } | ||
256 | |||
257 | nreq += pnfs_generic_alloc_ds_commits(cinfo, &list); | ||
258 | |||
259 | if (nreq == 0) { | ||
260 | cinfo->completion_ops->error_cleanup(NFS_I(inode)); | ||
261 | goto out; | ||
262 | } | ||
263 | |||
264 | atomic_add(nreq, &cinfo->mds->rpcs_out); | ||
265 | |||
266 | list_for_each_entry_safe(data, tmp, &list, pages) { | ||
267 | list_del_init(&data->pages); | ||
268 | if (!data->lseg) { | ||
269 | nfs_init_commit(data, mds_pages, NULL, cinfo); | ||
270 | nfs_initiate_commit(NFS_CLIENT(inode), data, | ||
271 | NFS_PROTO(data->inode), | ||
272 | data->mds_ops, how, 0); | ||
273 | } else { | ||
274 | struct pnfs_commit_bucket *buckets; | ||
275 | |||
276 | buckets = cinfo->ds->buckets; | ||
277 | nfs_init_commit(data, | ||
278 | &buckets[data->ds_commit_index].committing, | ||
279 | data->lseg, | ||
280 | cinfo); | ||
281 | initiate_commit(data, how); | ||
282 | } | ||
283 | } | ||
284 | out: | ||
285 | cinfo->ds->ncommitting = 0; | ||
286 | return PNFS_ATTEMPTED; | ||
287 | } | ||
288 | EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist); | ||
289 | |||
290 | /* | ||
291 | * Data server cache | ||
292 | * | ||
293 | * Data servers can be mapped to different device ids. | ||
294 | * nfs4_pnfs_ds reference counting | ||
295 | * - set to 1 on allocation | ||
296 | * - incremented when a device id maps a data server already in the cache. | ||
297 | * - decremented when deviceid is removed from the cache. | ||
298 | */ | ||
299 | static DEFINE_SPINLOCK(nfs4_ds_cache_lock); | ||
300 | static LIST_HEAD(nfs4_data_server_cache); | ||
301 | |||
302 | /* Debug routines */ | ||
303 | static void | ||
304 | print_ds(struct nfs4_pnfs_ds *ds) | ||
305 | { | ||
306 | if (ds == NULL) { | ||
307 | printk(KERN_WARNING "%s NULL device\n", __func__); | ||
308 | return; | ||
309 | } | ||
310 | printk(KERN_WARNING " ds %s\n" | ||
311 | " ref count %d\n" | ||
312 | " client %p\n" | ||
313 | " cl_exchange_flags %x\n", | ||
314 | ds->ds_remotestr, | ||
315 | atomic_read(&ds->ds_count), ds->ds_clp, | ||
316 | ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); | ||
317 | } | ||
318 | |||
319 | static bool | ||
320 | same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) | ||
321 | { | ||
322 | struct sockaddr_in *a, *b; | ||
323 | struct sockaddr_in6 *a6, *b6; | ||
324 | |||
325 | if (addr1->sa_family != addr2->sa_family) | ||
326 | return false; | ||
327 | |||
328 | switch (addr1->sa_family) { | ||
329 | case AF_INET: | ||
330 | a = (struct sockaddr_in *)addr1; | ||
331 | b = (struct sockaddr_in *)addr2; | ||
332 | |||
333 | if (a->sin_addr.s_addr == b->sin_addr.s_addr && | ||
334 | a->sin_port == b->sin_port) | ||
335 | return true; | ||
336 | break; | ||
337 | |||
338 | case AF_INET6: | ||
339 | a6 = (struct sockaddr_in6 *)addr1; | ||
340 | b6 = (struct sockaddr_in6 *)addr2; | ||
341 | |||
342 | /* LINKLOCAL addresses must have matching scope_id */ | ||
343 | if (ipv6_addr_src_scope(&a6->sin6_addr) == | ||
344 | IPV6_ADDR_SCOPE_LINKLOCAL && | ||
345 | a6->sin6_scope_id != b6->sin6_scope_id) | ||
346 | return false; | ||
347 | |||
348 | if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && | ||
349 | a6->sin6_port == b6->sin6_port) | ||
350 | return true; | ||
351 | break; | ||
352 | |||
353 | default: | ||
354 | dprintk("%s: unhandled address family: %u\n", | ||
355 | __func__, addr1->sa_family); | ||
356 | return false; | ||
357 | } | ||
358 | |||
359 | return false; | ||
360 | } | ||
361 | |||
362 | static bool | ||
363 | _same_data_server_addrs_locked(const struct list_head *dsaddrs1, | ||
364 | const struct list_head *dsaddrs2) | ||
365 | { | ||
366 | struct nfs4_pnfs_ds_addr *da1, *da2; | ||
367 | |||
368 | /* step through both lists, comparing as we go */ | ||
369 | for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node), | ||
370 | da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node); | ||
371 | da1 != NULL && da2 != NULL; | ||
372 | da1 = list_entry(da1->da_node.next, typeof(*da1), da_node), | ||
373 | da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) { | ||
374 | if (!same_sockaddr((struct sockaddr *)&da1->da_addr, | ||
375 | (struct sockaddr *)&da2->da_addr)) | ||
376 | return false; | ||
377 | } | ||
378 | if (da1 == NULL && da2 == NULL) | ||
379 | return true; | ||
380 | |||
381 | return false; | ||
382 | } | ||
383 | |||
384 | /* | ||
385 | * Lookup DS by addresses. nfs4_ds_cache_lock is held | ||
386 | */ | ||
387 | static struct nfs4_pnfs_ds * | ||
388 | _data_server_lookup_locked(const struct list_head *dsaddrs) | ||
389 | { | ||
390 | struct nfs4_pnfs_ds *ds; | ||
391 | |||
392 | list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) | ||
393 | if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) | ||
394 | return ds; | ||
395 | return NULL; | ||
396 | } | ||
397 | |||
398 | static void destroy_ds(struct nfs4_pnfs_ds *ds) | ||
399 | { | ||
400 | struct nfs4_pnfs_ds_addr *da; | ||
401 | |||
402 | dprintk("--> %s\n", __func__); | ||
403 | ifdebug(FACILITY) | ||
404 | print_ds(ds); | ||
405 | |||
406 | nfs_put_client(ds->ds_clp); | ||
407 | |||
408 | while (!list_empty(&ds->ds_addrs)) { | ||
409 | da = list_first_entry(&ds->ds_addrs, | ||
410 | struct nfs4_pnfs_ds_addr, | ||
411 | da_node); | ||
412 | list_del_init(&da->da_node); | ||
413 | kfree(da->da_remotestr); | ||
414 | kfree(da); | ||
415 | } | ||
416 | |||
417 | kfree(ds->ds_remotestr); | ||
418 | kfree(ds); | ||
419 | } | ||
420 | |||
421 | void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds) | ||
422 | { | ||
423 | if (atomic_dec_and_lock(&ds->ds_count, | ||
424 | &nfs4_ds_cache_lock)) { | ||
425 | list_del_init(&ds->ds_node); | ||
426 | spin_unlock(&nfs4_ds_cache_lock); | ||
427 | destroy_ds(ds); | ||
428 | } | ||
429 | } | ||
430 | EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_put); | ||
431 | |||
432 | /* | ||
433 | * Create a string with a human readable address and port to avoid | ||
434 | * complicated setup around many dprinks. | ||
435 | */ | ||
436 | static char * | ||
437 | nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) | ||
438 | { | ||
439 | struct nfs4_pnfs_ds_addr *da; | ||
440 | char *remotestr; | ||
441 | size_t len; | ||
442 | char *p; | ||
443 | |||
444 | len = 3; /* '{', '}' and eol */ | ||
445 | list_for_each_entry(da, dsaddrs, da_node) { | ||
446 | len += strlen(da->da_remotestr) + 1; /* string plus comma */ | ||
447 | } | ||
448 | |||
449 | remotestr = kzalloc(len, gfp_flags); | ||
450 | if (!remotestr) | ||
451 | return NULL; | ||
452 | |||
453 | p = remotestr; | ||
454 | *(p++) = '{'; | ||
455 | len--; | ||
456 | list_for_each_entry(da, dsaddrs, da_node) { | ||
457 | size_t ll = strlen(da->da_remotestr); | ||
458 | |||
459 | if (ll > len) | ||
460 | goto out_err; | ||
461 | |||
462 | memcpy(p, da->da_remotestr, ll); | ||
463 | p += ll; | ||
464 | len -= ll; | ||
465 | |||
466 | if (len < 1) | ||
467 | goto out_err; | ||
468 | (*p++) = ','; | ||
469 | len--; | ||
470 | } | ||
471 | if (len < 2) | ||
472 | goto out_err; | ||
473 | *(p++) = '}'; | ||
474 | *p = '\0'; | ||
475 | return remotestr; | ||
476 | out_err: | ||
477 | kfree(remotestr); | ||
478 | return NULL; | ||
479 | } | ||
480 | |||
481 | /* | ||
482 | * Given a list of multipath struct nfs4_pnfs_ds_addr, add it to ds cache if | ||
483 | * uncached and return cached struct nfs4_pnfs_ds. | ||
484 | */ | ||
485 | struct nfs4_pnfs_ds * | ||
486 | nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) | ||
487 | { | ||
488 | struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; | ||
489 | char *remotestr; | ||
490 | |||
491 | if (list_empty(dsaddrs)) { | ||
492 | dprintk("%s: no addresses defined\n", __func__); | ||
493 | goto out; | ||
494 | } | ||
495 | |||
496 | ds = kzalloc(sizeof(*ds), gfp_flags); | ||
497 | if (!ds) | ||
498 | goto out; | ||
499 | |||
500 | /* this is only used for debugging, so it's ok if its NULL */ | ||
501 | remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); | ||
502 | |||
503 | spin_lock(&nfs4_ds_cache_lock); | ||
504 | tmp_ds = _data_server_lookup_locked(dsaddrs); | ||
505 | if (tmp_ds == NULL) { | ||
506 | INIT_LIST_HEAD(&ds->ds_addrs); | ||
507 | list_splice_init(dsaddrs, &ds->ds_addrs); | ||
508 | ds->ds_remotestr = remotestr; | ||
509 | atomic_set(&ds->ds_count, 1); | ||
510 | INIT_LIST_HEAD(&ds->ds_node); | ||
511 | ds->ds_clp = NULL; | ||
512 | list_add(&ds->ds_node, &nfs4_data_server_cache); | ||
513 | dprintk("%s add new data server %s\n", __func__, | ||
514 | ds->ds_remotestr); | ||
515 | } else { | ||
516 | kfree(remotestr); | ||
517 | kfree(ds); | ||
518 | atomic_inc(&tmp_ds->ds_count); | ||
519 | dprintk("%s data server %s found, inc'ed ds_count to %d\n", | ||
520 | __func__, tmp_ds->ds_remotestr, | ||
521 | atomic_read(&tmp_ds->ds_count)); | ||
522 | ds = tmp_ds; | ||
523 | } | ||
524 | spin_unlock(&nfs4_ds_cache_lock); | ||
525 | out: | ||
526 | return ds; | ||
527 | } | ||
528 | EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add); | ||
529 | |||
530 | static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) | ||
531 | { | ||
532 | might_sleep(); | ||
533 | wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, | ||
534 | TASK_KILLABLE); | ||
535 | } | ||
536 | |||
537 | static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) | ||
538 | { | ||
539 | smp_mb__before_atomic(); | ||
540 | clear_bit(NFS4DS_CONNECTING, &ds->ds_state); | ||
541 | smp_mb__after_atomic(); | ||
542 | wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING); | ||
543 | } | ||
544 | |||
545 | static struct nfs_client *(*get_v3_ds_connect)( | ||
546 | struct nfs_client *mds_clp, | ||
547 | const struct sockaddr *ds_addr, | ||
548 | int ds_addrlen, | ||
549 | int ds_proto, | ||
550 | unsigned int ds_timeo, | ||
551 | unsigned int ds_retrans, | ||
552 | rpc_authflavor_t au_flavor); | ||
553 | |||
554 | static bool load_v3_ds_connect(void) | ||
555 | { | ||
556 | if (!get_v3_ds_connect) { | ||
557 | get_v3_ds_connect = symbol_request(nfs3_set_ds_client); | ||
558 | WARN_ON_ONCE(!get_v3_ds_connect); | ||
559 | } | ||
560 | |||
561 | return(get_v3_ds_connect != NULL); | ||
562 | } | ||
563 | |||
564 | void __exit nfs4_pnfs_v3_ds_connect_unload(void) | ||
565 | { | ||
566 | if (get_v3_ds_connect) { | ||
567 | symbol_put(nfs3_set_ds_client); | ||
568 | get_v3_ds_connect = NULL; | ||
569 | } | ||
570 | } | ||
571 | EXPORT_SYMBOL_GPL(nfs4_pnfs_v3_ds_connect_unload); | ||
572 | |||
573 | static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, | ||
574 | struct nfs4_pnfs_ds *ds, | ||
575 | unsigned int timeo, | ||
576 | unsigned int retrans, | ||
577 | rpc_authflavor_t au_flavor) | ||
578 | { | ||
579 | struct nfs_client *clp = ERR_PTR(-EIO); | ||
580 | struct nfs4_pnfs_ds_addr *da; | ||
581 | int status = 0; | ||
582 | |||
583 | dprintk("--> %s DS %s au_flavor %d\n", __func__, | ||
584 | ds->ds_remotestr, au_flavor); | ||
585 | |||
586 | if (!load_v3_ds_connect()) | ||
587 | goto out; | ||
588 | |||
589 | list_for_each_entry(da, &ds->ds_addrs, da_node) { | ||
590 | dprintk("%s: DS %s: trying address %s\n", | ||
591 | __func__, ds->ds_remotestr, da->da_remotestr); | ||
592 | |||
593 | clp = get_v3_ds_connect(mds_srv->nfs_client, | ||
594 | (struct sockaddr *)&da->da_addr, | ||
595 | da->da_addrlen, IPPROTO_TCP, | ||
596 | timeo, retrans, au_flavor); | ||
597 | if (!IS_ERR(clp)) | ||
598 | break; | ||
599 | } | ||
600 | |||
601 | if (IS_ERR(clp)) { | ||
602 | status = PTR_ERR(clp); | ||
603 | goto out; | ||
604 | } | ||
605 | |||
606 | smp_wmb(); | ||
607 | ds->ds_clp = clp; | ||
608 | dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); | ||
609 | out: | ||
610 | return status; | ||
611 | } | ||
612 | |||
613 | static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, | ||
614 | struct nfs4_pnfs_ds *ds, | ||
615 | unsigned int timeo, | ||
616 | unsigned int retrans, | ||
617 | u32 minor_version, | ||
618 | rpc_authflavor_t au_flavor) | ||
619 | { | ||
620 | struct nfs_client *clp = ERR_PTR(-EIO); | ||
621 | struct nfs4_pnfs_ds_addr *da; | ||
622 | int status = 0; | ||
623 | |||
624 | dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, | ||
625 | au_flavor); | ||
626 | |||
627 | list_for_each_entry(da, &ds->ds_addrs, da_node) { | ||
628 | dprintk("%s: DS %s: trying address %s\n", | ||
629 | __func__, ds->ds_remotestr, da->da_remotestr); | ||
630 | |||
631 | clp = nfs4_set_ds_client(mds_srv->nfs_client, | ||
632 | (struct sockaddr *)&da->da_addr, | ||
633 | da->da_addrlen, IPPROTO_TCP, | ||
634 | timeo, retrans, minor_version, | ||
635 | au_flavor); | ||
636 | if (!IS_ERR(clp)) | ||
637 | break; | ||
638 | } | ||
639 | |||
640 | if (IS_ERR(clp)) { | ||
641 | status = PTR_ERR(clp); | ||
642 | goto out; | ||
643 | } | ||
644 | |||
645 | status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time); | ||
646 | if (status) | ||
647 | goto out_put; | ||
648 | |||
649 | smp_wmb(); | ||
650 | ds->ds_clp = clp; | ||
651 | dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); | ||
652 | out: | ||
653 | return status; | ||
654 | out_put: | ||
655 | nfs_put_client(clp); | ||
656 | goto out; | ||
657 | } | ||
658 | |||
659 | /* | ||
660 | * Create an rpc connection to the nfs4_pnfs_ds data server. | ||
661 | * Currently only supports IPv4 and IPv6 addresses. | ||
662 | * If connection fails, make devid unavailable. | ||
663 | */ | ||
664 | void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, | ||
665 | struct nfs4_deviceid_node *devid, unsigned int timeo, | ||
666 | unsigned int retrans, u32 version, | ||
667 | u32 minor_version, rpc_authflavor_t au_flavor) | ||
668 | { | ||
669 | if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { | ||
670 | int err = 0; | ||
671 | |||
672 | if (version == 3) { | ||
673 | err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo, | ||
674 | retrans, au_flavor); | ||
675 | } else if (version == 4) { | ||
676 | err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo, | ||
677 | retrans, minor_version, | ||
678 | au_flavor); | ||
679 | } else { | ||
680 | dprintk("%s: unsupported DS version %d\n", __func__, | ||
681 | version); | ||
682 | err = -EPROTONOSUPPORT; | ||
683 | } | ||
684 | |||
685 | if (err) | ||
686 | nfs4_mark_deviceid_unavailable(devid); | ||
687 | nfs4_clear_ds_conn_bit(ds); | ||
688 | } else { | ||
689 | nfs4_wait_ds_connect(ds); | ||
690 | } | ||
691 | } | ||
692 | EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect); | ||
693 | |||
694 | /* | ||
695 | * Currently only supports ipv4, ipv6 and one multi-path address. | ||
696 | */ | ||
697 | struct nfs4_pnfs_ds_addr * | ||
698 | nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags) | ||
699 | { | ||
700 | struct nfs4_pnfs_ds_addr *da = NULL; | ||
701 | char *buf, *portstr; | ||
702 | __be16 port; | ||
703 | int nlen, rlen; | ||
704 | int tmp[2]; | ||
705 | __be32 *p; | ||
706 | char *netid, *match_netid; | ||
707 | size_t len, match_netid_len; | ||
708 | char *startsep = ""; | ||
709 | char *endsep = ""; | ||
710 | |||
711 | |||
712 | /* r_netid */ | ||
713 | p = xdr_inline_decode(xdr, 4); | ||
714 | if (unlikely(!p)) | ||
715 | goto out_err; | ||
716 | nlen = be32_to_cpup(p++); | ||
717 | |||
718 | p = xdr_inline_decode(xdr, nlen); | ||
719 | if (unlikely(!p)) | ||
720 | goto out_err; | ||
721 | |||
722 | netid = kmalloc(nlen+1, gfp_flags); | ||
723 | if (unlikely(!netid)) | ||
724 | goto out_err; | ||
725 | |||
726 | netid[nlen] = '\0'; | ||
727 | memcpy(netid, p, nlen); | ||
728 | |||
729 | /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ | ||
730 | p = xdr_inline_decode(xdr, 4); | ||
731 | if (unlikely(!p)) | ||
732 | goto out_free_netid; | ||
733 | rlen = be32_to_cpup(p); | ||
734 | |||
735 | p = xdr_inline_decode(xdr, rlen); | ||
736 | if (unlikely(!p)) | ||
737 | goto out_free_netid; | ||
738 | |||
739 | /* port is ".ABC.DEF", 8 chars max */ | ||
740 | if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { | ||
741 | dprintk("%s: Invalid address, length %d\n", __func__, | ||
742 | rlen); | ||
743 | goto out_free_netid; | ||
744 | } | ||
745 | buf = kmalloc(rlen + 1, gfp_flags); | ||
746 | if (!buf) { | ||
747 | dprintk("%s: Not enough memory\n", __func__); | ||
748 | goto out_free_netid; | ||
749 | } | ||
750 | buf[rlen] = '\0'; | ||
751 | memcpy(buf, p, rlen); | ||
752 | |||
753 | /* replace port '.' with '-' */ | ||
754 | portstr = strrchr(buf, '.'); | ||
755 | if (!portstr) { | ||
756 | dprintk("%s: Failed finding expected dot in port\n", | ||
757 | __func__); | ||
758 | goto out_free_buf; | ||
759 | } | ||
760 | *portstr = '-'; | ||
761 | |||
762 | /* find '.' between address and port */ | ||
763 | portstr = strrchr(buf, '.'); | ||
764 | if (!portstr) { | ||
765 | dprintk("%s: Failed finding expected dot between address and " | ||
766 | "port\n", __func__); | ||
767 | goto out_free_buf; | ||
768 | } | ||
769 | *portstr = '\0'; | ||
770 | |||
771 | da = kzalloc(sizeof(*da), gfp_flags); | ||
772 | if (unlikely(!da)) | ||
773 | goto out_free_buf; | ||
774 | |||
775 | INIT_LIST_HEAD(&da->da_node); | ||
776 | |||
777 | if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, | ||
778 | sizeof(da->da_addr))) { | ||
779 | dprintk("%s: error parsing address %s\n", __func__, buf); | ||
780 | goto out_free_da; | ||
781 | } | ||
782 | |||
783 | portstr++; | ||
784 | sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); | ||
785 | port = htons((tmp[0] << 8) | (tmp[1])); | ||
786 | |||
787 | switch (da->da_addr.ss_family) { | ||
788 | case AF_INET: | ||
789 | ((struct sockaddr_in *)&da->da_addr)->sin_port = port; | ||
790 | da->da_addrlen = sizeof(struct sockaddr_in); | ||
791 | match_netid = "tcp"; | ||
792 | match_netid_len = 3; | ||
793 | break; | ||
794 | |||
795 | case AF_INET6: | ||
796 | ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; | ||
797 | da->da_addrlen = sizeof(struct sockaddr_in6); | ||
798 | match_netid = "tcp6"; | ||
799 | match_netid_len = 4; | ||
800 | startsep = "["; | ||
801 | endsep = "]"; | ||
802 | break; | ||
803 | |||
804 | default: | ||
805 | dprintk("%s: unsupported address family: %u\n", | ||
806 | __func__, da->da_addr.ss_family); | ||
807 | goto out_free_da; | ||
808 | } | ||
809 | |||
810 | if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { | ||
811 | dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", | ||
812 | __func__, netid, match_netid); | ||
813 | goto out_free_da; | ||
814 | } | ||
815 | |||
816 | /* save human readable address */ | ||
817 | len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; | ||
818 | da->da_remotestr = kzalloc(len, gfp_flags); | ||
819 | |||
820 | /* NULL is ok, only used for dprintk */ | ||
821 | if (da->da_remotestr) | ||
822 | snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, | ||
823 | buf, endsep, ntohs(port)); | ||
824 | |||
825 | dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); | ||
826 | kfree(buf); | ||
827 | kfree(netid); | ||
828 | return da; | ||
829 | |||
830 | out_free_da: | ||
831 | kfree(da); | ||
832 | out_free_buf: | ||
833 | dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); | ||
834 | kfree(buf); | ||
835 | out_free_netid: | ||
836 | kfree(netid); | ||
837 | out_err: | ||
838 | return NULL; | ||
839 | } | ||
840 | EXPORT_SYMBOL_GPL(nfs4_decode_mp_ds_addr); | ||
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index c91a4799c562..568ecf0a880f 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -70,8 +70,15 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_read); | |||
70 | 70 | ||
71 | void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) | 71 | void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) |
72 | { | 72 | { |
73 | struct nfs_pgio_mirror *mirror; | ||
74 | |||
73 | pgio->pg_ops = &nfs_pgio_rw_ops; | 75 | pgio->pg_ops = &nfs_pgio_rw_ops; |
74 | pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; | 76 | |
77 | /* read path should never have more than one mirror */ | ||
78 | WARN_ON_ONCE(pgio->pg_mirror_count != 1); | ||
79 | |||
80 | mirror = &pgio->pg_mirrors[0]; | ||
81 | mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; | ||
75 | } | 82 | } |
76 | EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); | 83 | EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); |
77 | 84 | ||
@@ -81,6 +88,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | |||
81 | struct nfs_page *new; | 88 | struct nfs_page *new; |
82 | unsigned int len; | 89 | unsigned int len; |
83 | struct nfs_pageio_descriptor pgio; | 90 | struct nfs_pageio_descriptor pgio; |
91 | struct nfs_pgio_mirror *pgm; | ||
84 | 92 | ||
85 | len = nfs_page_length(page); | 93 | len = nfs_page_length(page); |
86 | if (len == 0) | 94 | if (len == 0) |
@@ -97,7 +105,13 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | |||
97 | &nfs_async_read_completion_ops); | 105 | &nfs_async_read_completion_ops); |
98 | nfs_pageio_add_request(&pgio, new); | 106 | nfs_pageio_add_request(&pgio, new); |
99 | nfs_pageio_complete(&pgio); | 107 | nfs_pageio_complete(&pgio); |
100 | NFS_I(inode)->read_io += pgio.pg_bytes_written; | 108 | |
109 | /* It doesn't make sense to do mirrored reads! */ | ||
110 | WARN_ON_ONCE(pgio.pg_mirror_count != 1); | ||
111 | |||
112 | pgm = &pgio.pg_mirrors[0]; | ||
113 | NFS_I(inode)->read_io += pgm->pg_bytes_written; | ||
114 | |||
101 | return 0; | 115 | return 0; |
102 | } | 116 | } |
103 | 117 | ||
@@ -168,13 +182,14 @@ out: | |||
168 | 182 | ||
169 | static void nfs_initiate_read(struct nfs_pgio_header *hdr, | 183 | static void nfs_initiate_read(struct nfs_pgio_header *hdr, |
170 | struct rpc_message *msg, | 184 | struct rpc_message *msg, |
185 | const struct nfs_rpc_ops *rpc_ops, | ||
171 | struct rpc_task_setup *task_setup_data, int how) | 186 | struct rpc_task_setup *task_setup_data, int how) |
172 | { | 187 | { |
173 | struct inode *inode = hdr->inode; | 188 | struct inode *inode = hdr->inode; |
174 | int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; | 189 | int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; |
175 | 190 | ||
176 | task_setup_data->flags |= swap_flags; | 191 | task_setup_data->flags |= swap_flags; |
177 | NFS_PROTO(inode)->read_setup(hdr, msg); | 192 | rpc_ops->read_setup(hdr, msg); |
178 | } | 193 | } |
179 | 194 | ||
180 | static void | 195 | static void |
@@ -351,6 +366,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, | |||
351 | struct list_head *pages, unsigned nr_pages) | 366 | struct list_head *pages, unsigned nr_pages) |
352 | { | 367 | { |
353 | struct nfs_pageio_descriptor pgio; | 368 | struct nfs_pageio_descriptor pgio; |
369 | struct nfs_pgio_mirror *pgm; | ||
354 | struct nfs_readdesc desc = { | 370 | struct nfs_readdesc desc = { |
355 | .pgio = &pgio, | 371 | .pgio = &pgio, |
356 | }; | 372 | }; |
@@ -386,10 +402,15 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, | |||
386 | &nfs_async_read_completion_ops); | 402 | &nfs_async_read_completion_ops); |
387 | 403 | ||
388 | ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); | 404 | ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); |
389 | |||
390 | nfs_pageio_complete(&pgio); | 405 | nfs_pageio_complete(&pgio); |
391 | NFS_I(inode)->read_io += pgio.pg_bytes_written; | 406 | |
392 | npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 407 | /* It doesn't make sense to do mirrored reads! */ |
408 | WARN_ON_ONCE(pgio.pg_mirror_count != 1); | ||
409 | |||
410 | pgm = &pgio.pg_mirrors[0]; | ||
411 | NFS_I(inode)->read_io += pgm->pg_bytes_written; | ||
412 | npages = (pgm->pg_bytes_written + PAGE_CACHE_SIZE - 1) >> | ||
413 | PAGE_CACHE_SHIFT; | ||
393 | nfs_add_stats(inode, NFSIOS_READPAGES, npages); | 414 | nfs_add_stats(inode, NFSIOS_READPAGES, npages); |
394 | read_complete: | 415 | read_complete: |
395 | put_nfs_open_context(desc.ctx); | 416 | put_nfs_open_context(desc.ctx); |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 31a11b0e885d..322b2de02988 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -311,7 +311,6 @@ const struct super_operations nfs_sops = { | |||
311 | .destroy_inode = nfs_destroy_inode, | 311 | .destroy_inode = nfs_destroy_inode, |
312 | .write_inode = nfs_write_inode, | 312 | .write_inode = nfs_write_inode, |
313 | .drop_inode = nfs_drop_inode, | 313 | .drop_inode = nfs_drop_inode, |
314 | .put_super = nfs_put_super, | ||
315 | .statfs = nfs_statfs, | 314 | .statfs = nfs_statfs, |
316 | .evict_inode = nfs_evict_inode, | 315 | .evict_inode = nfs_evict_inode, |
317 | .umount_begin = nfs_umount_begin, | 316 | .umount_begin = nfs_umount_begin, |
@@ -405,12 +404,15 @@ void __exit unregister_nfs_fs(void) | |||
405 | unregister_filesystem(&nfs_fs_type); | 404 | unregister_filesystem(&nfs_fs_type); |
406 | } | 405 | } |
407 | 406 | ||
408 | void nfs_sb_active(struct super_block *sb) | 407 | bool nfs_sb_active(struct super_block *sb) |
409 | { | 408 | { |
410 | struct nfs_server *server = NFS_SB(sb); | 409 | struct nfs_server *server = NFS_SB(sb); |
411 | 410 | ||
412 | if (atomic_inc_return(&server->active) == 1) | 411 | if (!atomic_inc_not_zero(&sb->s_active)) |
413 | atomic_inc(&sb->s_active); | 412 | return false; |
413 | if (atomic_inc_return(&server->active) != 1) | ||
414 | atomic_dec(&sb->s_active); | ||
415 | return true; | ||
414 | } | 416 | } |
415 | EXPORT_SYMBOL_GPL(nfs_sb_active); | 417 | EXPORT_SYMBOL_GPL(nfs_sb_active); |
416 | 418 | ||
@@ -2569,7 +2571,7 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, | |||
2569 | error = nfs_bdi_register(server); | 2571 | error = nfs_bdi_register(server); |
2570 | if (error) { | 2572 | if (error) { |
2571 | mntroot = ERR_PTR(error); | 2573 | mntroot = ERR_PTR(error); |
2572 | goto error_splat_bdi; | 2574 | goto error_splat_super; |
2573 | } | 2575 | } |
2574 | server->super = s; | 2576 | server->super = s; |
2575 | } | 2577 | } |
@@ -2601,9 +2603,6 @@ error_splat_root: | |||
2601 | dput(mntroot); | 2603 | dput(mntroot); |
2602 | mntroot = ERR_PTR(error); | 2604 | mntroot = ERR_PTR(error); |
2603 | error_splat_super: | 2605 | error_splat_super: |
2604 | if (server && !s->s_root) | ||
2605 | bdi_unregister(&server->backing_dev_info); | ||
2606 | error_splat_bdi: | ||
2607 | deactivate_locked_super(s); | 2606 | deactivate_locked_super(s); |
2608 | goto out; | 2607 | goto out; |
2609 | } | 2608 | } |
@@ -2651,27 +2650,19 @@ out: | |||
2651 | EXPORT_SYMBOL_GPL(nfs_fs_mount); | 2650 | EXPORT_SYMBOL_GPL(nfs_fs_mount); |
2652 | 2651 | ||
2653 | /* | 2652 | /* |
2654 | * Ensure that we unregister the bdi before kill_anon_super | ||
2655 | * releases the device name | ||
2656 | */ | ||
2657 | void nfs_put_super(struct super_block *s) | ||
2658 | { | ||
2659 | struct nfs_server *server = NFS_SB(s); | ||
2660 | |||
2661 | bdi_unregister(&server->backing_dev_info); | ||
2662 | } | ||
2663 | EXPORT_SYMBOL_GPL(nfs_put_super); | ||
2664 | |||
2665 | /* | ||
2666 | * Destroy an NFS2/3 superblock | 2653 | * Destroy an NFS2/3 superblock |
2667 | */ | 2654 | */ |
2668 | void nfs_kill_super(struct super_block *s) | 2655 | void nfs_kill_super(struct super_block *s) |
2669 | { | 2656 | { |
2670 | struct nfs_server *server = NFS_SB(s); | 2657 | struct nfs_server *server = NFS_SB(s); |
2658 | dev_t dev = s->s_dev; | ||
2659 | |||
2660 | generic_shutdown_super(s); | ||
2671 | 2661 | ||
2672 | kill_anon_super(s); | ||
2673 | nfs_fscache_release_super_cookie(s); | 2662 | nfs_fscache_release_super_cookie(s); |
2663 | |||
2674 | nfs_free_server(server); | 2664 | nfs_free_server(server); |
2665 | free_anon_bdev(dev); | ||
2675 | } | 2666 | } |
2676 | EXPORT_SYMBOL_GPL(nfs_kill_super); | 2667 | EXPORT_SYMBOL_GPL(nfs_kill_super); |
2677 | 2668 | ||
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index af3af685a9e3..88a6d2196ece 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -473,13 +473,18 @@ try_again: | |||
473 | do { | 473 | do { |
474 | /* | 474 | /* |
475 | * Subrequests are always contiguous, non overlapping | 475 | * Subrequests are always contiguous, non overlapping |
476 | * and in order. If not, it's a programming error. | 476 | * and in order - but may be repeated (mirrored writes). |
477 | */ | 477 | */ |
478 | WARN_ON_ONCE(subreq->wb_offset != | 478 | if (subreq->wb_offset == (head->wb_offset + total_bytes)) { |
479 | (head->wb_offset + total_bytes)); | 479 | /* keep track of how many bytes this group covers */ |
480 | 480 | total_bytes += subreq->wb_bytes; | |
481 | /* keep track of how many bytes this group covers */ | 481 | } else if (WARN_ON_ONCE(subreq->wb_offset < head->wb_offset || |
482 | total_bytes += subreq->wb_bytes; | 482 | ((subreq->wb_offset + subreq->wb_bytes) > |
483 | (head->wb_offset + total_bytes)))) { | ||
484 | nfs_page_group_unlock(head); | ||
485 | spin_unlock(&inode->i_lock); | ||
486 | return ERR_PTR(-EIO); | ||
487 | } | ||
483 | 488 | ||
484 | if (!nfs_lock_request(subreq)) { | 489 | if (!nfs_lock_request(subreq)) { |
485 | /* releases page group bit lock and | 490 | /* releases page group bit lock and |
@@ -786,7 +791,7 @@ nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, | |||
786 | spin_unlock(cinfo->lock); | 791 | spin_unlock(cinfo->lock); |
787 | if (!cinfo->dreq) { | 792 | if (!cinfo->dreq) { |
788 | inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); | 793 | inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); |
789 | inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, | 794 | inc_bdi_stat(inode_to_bdi(page_file_mapping(req->wb_page)->host), |
790 | BDI_RECLAIMABLE); | 795 | BDI_RECLAIMABLE); |
791 | __mark_inode_dirty(req->wb_context->dentry->d_inode, | 796 | __mark_inode_dirty(req->wb_context->dentry->d_inode, |
792 | I_DIRTY_DATASYNC); | 797 | I_DIRTY_DATASYNC); |
@@ -842,9 +847,9 @@ EXPORT_SYMBOL_GPL(nfs_init_cinfo); | |||
842 | */ | 847 | */ |
843 | void | 848 | void |
844 | nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, | 849 | nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, |
845 | struct nfs_commit_info *cinfo) | 850 | struct nfs_commit_info *cinfo, u32 ds_commit_idx) |
846 | { | 851 | { |
847 | if (pnfs_mark_request_commit(req, lseg, cinfo)) | 852 | if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx)) |
848 | return; | 853 | return; |
849 | nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo); | 854 | nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo); |
850 | } | 855 | } |
@@ -853,7 +858,7 @@ static void | |||
853 | nfs_clear_page_commit(struct page *page) | 858 | nfs_clear_page_commit(struct page *page) |
854 | { | 859 | { |
855 | dec_zone_page_state(page, NR_UNSTABLE_NFS); | 860 | dec_zone_page_state(page, NR_UNSTABLE_NFS); |
856 | dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE); | 861 | dec_bdi_stat(inode_to_bdi(page_file_mapping(page)->host), BDI_RECLAIMABLE); |
857 | } | 862 | } |
858 | 863 | ||
859 | /* Called holding inode (/cinfo) lock */ | 864 | /* Called holding inode (/cinfo) lock */ |
@@ -900,7 +905,8 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) | |||
900 | } | 905 | } |
901 | if (nfs_write_need_commit(hdr)) { | 906 | if (nfs_write_need_commit(hdr)) { |
902 | memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); | 907 | memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); |
903 | nfs_mark_request_commit(req, hdr->lseg, &cinfo); | 908 | nfs_mark_request_commit(req, hdr->lseg, &cinfo, |
909 | hdr->pgio_mirror_idx); | ||
904 | goto next; | 910 | goto next; |
905 | } | 911 | } |
906 | remove_req: | 912 | remove_req: |
@@ -1091,6 +1097,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) | |||
1091 | { | 1097 | { |
1092 | struct nfs_open_context *ctx = nfs_file_open_context(file); | 1098 | struct nfs_open_context *ctx = nfs_file_open_context(file); |
1093 | struct nfs_lock_context *l_ctx; | 1099 | struct nfs_lock_context *l_ctx; |
1100 | struct file_lock_context *flctx = file_inode(file)->i_flctx; | ||
1094 | struct nfs_page *req; | 1101 | struct nfs_page *req; |
1095 | int do_flush, status; | 1102 | int do_flush, status; |
1096 | /* | 1103 | /* |
@@ -1109,7 +1116,9 @@ int nfs_flush_incompatible(struct file *file, struct page *page) | |||
1109 | do_flush = req->wb_page != page || req->wb_context != ctx; | 1116 | do_flush = req->wb_page != page || req->wb_context != ctx; |
1110 | /* for now, flush if more than 1 request in page_group */ | 1117 | /* for now, flush if more than 1 request in page_group */ |
1111 | do_flush |= req->wb_this_page != req; | 1118 | do_flush |= req->wb_this_page != req; |
1112 | if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) { | 1119 | if (l_ctx && flctx && |
1120 | !(list_empty_careful(&flctx->flc_posix) && | ||
1121 | list_empty_careful(&flctx->flc_flock))) { | ||
1113 | do_flush |= l_ctx->lockowner.l_owner != current->files | 1122 | do_flush |= l_ctx->lockowner.l_owner != current->files |
1114 | || l_ctx->lockowner.l_pid != current->tgid; | 1123 | || l_ctx->lockowner.l_pid != current->tgid; |
1115 | } | 1124 | } |
@@ -1170,6 +1179,13 @@ out: | |||
1170 | return PageUptodate(page) != 0; | 1179 | return PageUptodate(page) != 0; |
1171 | } | 1180 | } |
1172 | 1181 | ||
1182 | static bool | ||
1183 | is_whole_file_wrlock(struct file_lock *fl) | ||
1184 | { | ||
1185 | return fl->fl_start == 0 && fl->fl_end == OFFSET_MAX && | ||
1186 | fl->fl_type == F_WRLCK; | ||
1187 | } | ||
1188 | |||
1173 | /* If we know the page is up to date, and we're not using byte range locks (or | 1189 | /* If we know the page is up to date, and we're not using byte range locks (or |
1174 | * if we have the whole file locked for writing), it may be more efficient to | 1190 | * if we have the whole file locked for writing), it may be more efficient to |
1175 | * extend the write to cover the entire page in order to avoid fragmentation | 1191 | * extend the write to cover the entire page in order to avoid fragmentation |
@@ -1180,17 +1196,36 @@ out: | |||
1180 | */ | 1196 | */ |
1181 | static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode) | 1197 | static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode) |
1182 | { | 1198 | { |
1199 | int ret; | ||
1200 | struct file_lock_context *flctx = inode->i_flctx; | ||
1201 | struct file_lock *fl; | ||
1202 | |||
1183 | if (file->f_flags & O_DSYNC) | 1203 | if (file->f_flags & O_DSYNC) |
1184 | return 0; | 1204 | return 0; |
1185 | if (!nfs_write_pageuptodate(page, inode)) | 1205 | if (!nfs_write_pageuptodate(page, inode)) |
1186 | return 0; | 1206 | return 0; |
1187 | if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) | 1207 | if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) |
1188 | return 1; | 1208 | return 1; |
1189 | if (inode->i_flock == NULL || (inode->i_flock->fl_start == 0 && | 1209 | if (!flctx || (list_empty_careful(&flctx->flc_flock) && |
1190 | inode->i_flock->fl_end == OFFSET_MAX && | 1210 | list_empty_careful(&flctx->flc_posix))) |
1191 | inode->i_flock->fl_type != F_RDLCK)) | 1211 | return 0; |
1192 | return 1; | 1212 | |
1193 | return 0; | 1213 | /* Check to see if there are whole file write locks */ |
1214 | ret = 0; | ||
1215 | spin_lock(&flctx->flc_lock); | ||
1216 | if (!list_empty(&flctx->flc_posix)) { | ||
1217 | fl = list_first_entry(&flctx->flc_posix, struct file_lock, | ||
1218 | fl_list); | ||
1219 | if (is_whole_file_wrlock(fl)) | ||
1220 | ret = 1; | ||
1221 | } else if (!list_empty(&flctx->flc_flock)) { | ||
1222 | fl = list_first_entry(&flctx->flc_flock, struct file_lock, | ||
1223 | fl_list); | ||
1224 | if (fl->fl_type == F_WRLCK) | ||
1225 | ret = 1; | ||
1226 | } | ||
1227 | spin_unlock(&flctx->flc_lock); | ||
1228 | return ret; | ||
1194 | } | 1229 | } |
1195 | 1230 | ||
1196 | /* | 1231 | /* |
@@ -1240,15 +1275,15 @@ static int flush_task_priority(int how) | |||
1240 | 1275 | ||
1241 | static void nfs_initiate_write(struct nfs_pgio_header *hdr, | 1276 | static void nfs_initiate_write(struct nfs_pgio_header *hdr, |
1242 | struct rpc_message *msg, | 1277 | struct rpc_message *msg, |
1278 | const struct nfs_rpc_ops *rpc_ops, | ||
1243 | struct rpc_task_setup *task_setup_data, int how) | 1279 | struct rpc_task_setup *task_setup_data, int how) |
1244 | { | 1280 | { |
1245 | struct inode *inode = hdr->inode; | ||
1246 | int priority = flush_task_priority(how); | 1281 | int priority = flush_task_priority(how); |
1247 | 1282 | ||
1248 | task_setup_data->priority = priority; | 1283 | task_setup_data->priority = priority; |
1249 | NFS_PROTO(inode)->write_setup(hdr, msg); | 1284 | rpc_ops->write_setup(hdr, msg); |
1250 | 1285 | ||
1251 | nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, | 1286 | nfs4_state_protect_write(NFS_SERVER(hdr->inode)->nfs_client, |
1252 | &task_setup_data->rpc_client, msg, hdr); | 1287 | &task_setup_data->rpc_client, msg, hdr); |
1253 | } | 1288 | } |
1254 | 1289 | ||
@@ -1298,8 +1333,14 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_write); | |||
1298 | 1333 | ||
1299 | void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) | 1334 | void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) |
1300 | { | 1335 | { |
1336 | struct nfs_pgio_mirror *mirror; | ||
1337 | |||
1301 | pgio->pg_ops = &nfs_pgio_rw_ops; | 1338 | pgio->pg_ops = &nfs_pgio_rw_ops; |
1302 | pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; | 1339 | |
1340 | nfs_pageio_stop_mirroring(pgio); | ||
1341 | |||
1342 | mirror = &pgio->pg_mirrors[0]; | ||
1343 | mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; | ||
1303 | } | 1344 | } |
1304 | EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); | 1345 | EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); |
1305 | 1346 | ||
@@ -1465,6 +1506,7 @@ void nfs_commitdata_release(struct nfs_commit_data *data) | |||
1465 | EXPORT_SYMBOL_GPL(nfs_commitdata_release); | 1506 | EXPORT_SYMBOL_GPL(nfs_commitdata_release); |
1466 | 1507 | ||
1467 | int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, | 1508 | int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, |
1509 | const struct nfs_rpc_ops *nfs_ops, | ||
1468 | const struct rpc_call_ops *call_ops, | 1510 | const struct rpc_call_ops *call_ops, |
1469 | int how, int flags) | 1511 | int how, int flags) |
1470 | { | 1512 | { |
@@ -1486,7 +1528,7 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, | |||
1486 | .priority = priority, | 1528 | .priority = priority, |
1487 | }; | 1529 | }; |
1488 | /* Set up the initial task struct. */ | 1530 | /* Set up the initial task struct. */ |
1489 | NFS_PROTO(data->inode)->commit_setup(data, &msg); | 1531 | nfs_ops->commit_setup(data, &msg); |
1490 | 1532 | ||
1491 | dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); | 1533 | dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); |
1492 | 1534 | ||
@@ -1554,17 +1596,18 @@ EXPORT_SYMBOL_GPL(nfs_init_commit); | |||
1554 | 1596 | ||
1555 | void nfs_retry_commit(struct list_head *page_list, | 1597 | void nfs_retry_commit(struct list_head *page_list, |
1556 | struct pnfs_layout_segment *lseg, | 1598 | struct pnfs_layout_segment *lseg, |
1557 | struct nfs_commit_info *cinfo) | 1599 | struct nfs_commit_info *cinfo, |
1600 | u32 ds_commit_idx) | ||
1558 | { | 1601 | { |
1559 | struct nfs_page *req; | 1602 | struct nfs_page *req; |
1560 | 1603 | ||
1561 | while (!list_empty(page_list)) { | 1604 | while (!list_empty(page_list)) { |
1562 | req = nfs_list_entry(page_list->next); | 1605 | req = nfs_list_entry(page_list->next); |
1563 | nfs_list_remove_request(req); | 1606 | nfs_list_remove_request(req); |
1564 | nfs_mark_request_commit(req, lseg, cinfo); | 1607 | nfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx); |
1565 | if (!cinfo->dreq) { | 1608 | if (!cinfo->dreq) { |
1566 | dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); | 1609 | dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); |
1567 | dec_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, | 1610 | dec_bdi_stat(inode_to_bdi(page_file_mapping(req->wb_page)->host), |
1568 | BDI_RECLAIMABLE); | 1611 | BDI_RECLAIMABLE); |
1569 | } | 1612 | } |
1570 | nfs_unlock_and_release_request(req); | 1613 | nfs_unlock_and_release_request(req); |
@@ -1589,10 +1632,10 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, | |||
1589 | /* Set up the argument struct */ | 1632 | /* Set up the argument struct */ |
1590 | nfs_init_commit(data, head, NULL, cinfo); | 1633 | nfs_init_commit(data, head, NULL, cinfo); |
1591 | atomic_inc(&cinfo->mds->rpcs_out); | 1634 | atomic_inc(&cinfo->mds->rpcs_out); |
1592 | return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, | 1635 | return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), |
1593 | how, 0); | 1636 | data->mds_ops, how, 0); |
1594 | out_bad: | 1637 | out_bad: |
1595 | nfs_retry_commit(head, NULL, cinfo); | 1638 | nfs_retry_commit(head, NULL, cinfo, 0); |
1596 | cinfo->completion_ops->error_cleanup(NFS_I(inode)); | 1639 | cinfo->completion_ops->error_cleanup(NFS_I(inode)); |
1597 | return -ENOMEM; | 1640 | return -ENOMEM; |
1598 | } | 1641 | } |
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 73395156bdb4..683bf718aead 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig | |||
@@ -82,6 +82,16 @@ config NFSD_V4 | |||
82 | 82 | ||
83 | If unsure, say N. | 83 | If unsure, say N. |
84 | 84 | ||
85 | config NFSD_PNFS | ||
86 | bool "NFSv4.1 server support for Parallel NFS (pNFS)" | ||
87 | depends on NFSD_V4 | ||
88 | help | ||
89 | This option enables support for the parallel NFS features of the | ||
90 | minor version 1 of the NFSv4 protocol (RFC5661) in the kernel's NFS | ||
91 | server. | ||
92 | |||
93 | If unsure, say N. | ||
94 | |||
85 | config NFSD_V4_SECURITY_LABEL | 95 | config NFSD_V4_SECURITY_LABEL |
86 | bool "Provide Security Label support for NFSv4 server" | 96 | bool "Provide Security Label support for NFSv4 server" |
87 | depends on NFSD_V4 && SECURITY | 97 | depends on NFSD_V4 && SECURITY |
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index af32ef06b4fe..9a6028e120c6 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile | |||
@@ -2,9 +2,14 @@ | |||
2 | # Makefile for the Linux nfs server | 2 | # Makefile for the Linux nfs server |
3 | # | 3 | # |
4 | 4 | ||
5 | ccflags-y += -I$(src) # needed for trace events | ||
6 | |||
5 | obj-$(CONFIG_NFSD) += nfsd.o | 7 | obj-$(CONFIG_NFSD) += nfsd.o |
6 | 8 | ||
7 | nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ | 9 | # this one should be compiled first, as the tracing macros can easily blow up |
10 | nfsd-y += trace.o | ||
11 | |||
12 | nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ | ||
8 | export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o | 13 | export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o |
9 | nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o | 14 | nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o |
10 | nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o | 15 | nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o |
@@ -12,3 +17,4 @@ nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o | |||
12 | nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o | 17 | nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o |
13 | nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ | 18 | nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ |
14 | nfs4acl.o nfs4callback.o nfs4recover.o | 19 | nfs4acl.o nfs4callback.o nfs4recover.o |
20 | nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o blocklayout.o blocklayoutxdr.o | ||
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c new file mode 100644 index 000000000000..cdbc78c72542 --- /dev/null +++ b/fs/nfsd/blocklayout.c | |||
@@ -0,0 +1,189 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014 Christoph Hellwig. | ||
3 | */ | ||
4 | #include <linux/exportfs.h> | ||
5 | #include <linux/genhd.h> | ||
6 | #include <linux/slab.h> | ||
7 | |||
8 | #include <linux/nfsd/debug.h> | ||
9 | |||
10 | #include "blocklayoutxdr.h" | ||
11 | #include "pnfs.h" | ||
12 | |||
13 | #define NFSDDBG_FACILITY NFSDDBG_PNFS | ||
14 | |||
15 | |||
16 | static int | ||
17 | nfsd4_block_get_device_info_simple(struct super_block *sb, | ||
18 | struct nfsd4_getdeviceinfo *gdp) | ||
19 | { | ||
20 | struct pnfs_block_deviceaddr *dev; | ||
21 | struct pnfs_block_volume *b; | ||
22 | |||
23 | dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) + | ||
24 | sizeof(struct pnfs_block_volume), GFP_KERNEL); | ||
25 | if (!dev) | ||
26 | return -ENOMEM; | ||
27 | gdp->gd_device = dev; | ||
28 | |||
29 | dev->nr_volumes = 1; | ||
30 | b = &dev->volumes[0]; | ||
31 | |||
32 | b->type = PNFS_BLOCK_VOLUME_SIMPLE; | ||
33 | b->simple.sig_len = PNFS_BLOCK_UUID_LEN; | ||
34 | return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len, | ||
35 | &b->simple.offset); | ||
36 | } | ||
37 | |||
38 | static __be32 | ||
39 | nfsd4_block_proc_getdeviceinfo(struct super_block *sb, | ||
40 | struct nfsd4_getdeviceinfo *gdp) | ||
41 | { | ||
42 | if (sb->s_bdev != sb->s_bdev->bd_contains) | ||
43 | return nfserr_inval; | ||
44 | return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp)); | ||
45 | } | ||
46 | |||
47 | static __be32 | ||
48 | nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, | ||
49 | struct nfsd4_layoutget *args) | ||
50 | { | ||
51 | struct nfsd4_layout_seg *seg = &args->lg_seg; | ||
52 | struct super_block *sb = inode->i_sb; | ||
53 | u32 block_size = (1 << inode->i_blkbits); | ||
54 | struct pnfs_block_extent *bex; | ||
55 | struct iomap iomap; | ||
56 | u32 device_generation = 0; | ||
57 | int error; | ||
58 | |||
59 | /* | ||
60 | * We do not attempt to support I/O smaller than the fs block size, | ||
61 | * or not aligned to it. | ||
62 | */ | ||
63 | if (args->lg_minlength < block_size) { | ||
64 | dprintk("pnfsd: I/O too small\n"); | ||
65 | goto out_layoutunavailable; | ||
66 | } | ||
67 | if (seg->offset & (block_size - 1)) { | ||
68 | dprintk("pnfsd: I/O misaligned\n"); | ||
69 | goto out_layoutunavailable; | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * Some clients barf on non-zero block numbers for NONE or INVALID | ||
74 | * layouts, so make sure to zero the whole structure. | ||
75 | */ | ||
76 | error = -ENOMEM; | ||
77 | bex = kzalloc(sizeof(*bex), GFP_KERNEL); | ||
78 | if (!bex) | ||
79 | goto out_error; | ||
80 | args->lg_content = bex; | ||
81 | |||
82 | error = sb->s_export_op->map_blocks(inode, seg->offset, seg->length, | ||
83 | &iomap, seg->iomode != IOMODE_READ, | ||
84 | &device_generation); | ||
85 | if (error) { | ||
86 | if (error == -ENXIO) | ||
87 | goto out_layoutunavailable; | ||
88 | goto out_error; | ||
89 | } | ||
90 | |||
91 | if (iomap.length < args->lg_minlength) { | ||
92 | dprintk("pnfsd: extent smaller than minlength\n"); | ||
93 | goto out_layoutunavailable; | ||
94 | } | ||
95 | |||
96 | switch (iomap.type) { | ||
97 | case IOMAP_MAPPED: | ||
98 | if (seg->iomode == IOMODE_READ) | ||
99 | bex->es = PNFS_BLOCK_READ_DATA; | ||
100 | else | ||
101 | bex->es = PNFS_BLOCK_READWRITE_DATA; | ||
102 | bex->soff = (iomap.blkno << 9); | ||
103 | break; | ||
104 | case IOMAP_UNWRITTEN: | ||
105 | if (seg->iomode & IOMODE_RW) { | ||
106 | /* | ||
107 | * Crack monkey special case from section 2.3.1. | ||
108 | */ | ||
109 | if (args->lg_minlength == 0) { | ||
110 | dprintk("pnfsd: no soup for you!\n"); | ||
111 | goto out_layoutunavailable; | ||
112 | } | ||
113 | |||
114 | bex->es = PNFS_BLOCK_INVALID_DATA; | ||
115 | bex->soff = (iomap.blkno << 9); | ||
116 | break; | ||
117 | } | ||
118 | /*FALLTHRU*/ | ||
119 | case IOMAP_HOLE: | ||
120 | if (seg->iomode == IOMODE_READ) { | ||
121 | bex->es = PNFS_BLOCK_NONE_DATA; | ||
122 | break; | ||
123 | } | ||
124 | /*FALLTHRU*/ | ||
125 | case IOMAP_DELALLOC: | ||
126 | default: | ||
127 | WARN(1, "pnfsd: filesystem returned %d extent\n", iomap.type); | ||
128 | goto out_layoutunavailable; | ||
129 | } | ||
130 | |||
131 | error = nfsd4_set_deviceid(&bex->vol_id, fhp, device_generation); | ||
132 | if (error) | ||
133 | goto out_error; | ||
134 | bex->foff = iomap.offset; | ||
135 | bex->len = iomap.length; | ||
136 | |||
137 | seg->offset = iomap.offset; | ||
138 | seg->length = iomap.length; | ||
139 | |||
140 | dprintk("GET: %lld:%lld %d\n", bex->foff, bex->len, bex->es); | ||
141 | return 0; | ||
142 | |||
143 | out_error: | ||
144 | seg->length = 0; | ||
145 | return nfserrno(error); | ||
146 | out_layoutunavailable: | ||
147 | seg->length = 0; | ||
148 | return nfserr_layoutunavailable; | ||
149 | } | ||
150 | |||
151 | static __be32 | ||
152 | nfsd4_block_proc_layoutcommit(struct inode *inode, | ||
153 | struct nfsd4_layoutcommit *lcp) | ||
154 | { | ||
155 | loff_t new_size = lcp->lc_last_wr + 1; | ||
156 | struct iattr iattr = { .ia_valid = 0 }; | ||
157 | struct iomap *iomaps; | ||
158 | int nr_iomaps; | ||
159 | int error; | ||
160 | |||
161 | nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout, | ||
162 | lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits); | ||
163 | if (nr_iomaps < 0) | ||
164 | return nfserrno(nr_iomaps); | ||
165 | |||
166 | if (lcp->lc_mtime.tv_nsec == UTIME_NOW || | ||
167 | timespec_compare(&lcp->lc_mtime, &inode->i_mtime) < 0) | ||
168 | lcp->lc_mtime = current_fs_time(inode->i_sb); | ||
169 | iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME; | ||
170 | iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime; | ||
171 | |||
172 | if (new_size > i_size_read(inode)) { | ||
173 | iattr.ia_valid |= ATTR_SIZE; | ||
174 | iattr.ia_size = new_size; | ||
175 | } | ||
176 | |||
177 | error = inode->i_sb->s_export_op->commit_blocks(inode, iomaps, | ||
178 | nr_iomaps, &iattr); | ||
179 | kfree(iomaps); | ||
180 | return nfserrno(error); | ||
181 | } | ||
182 | |||
183 | const struct nfsd4_layout_ops bl_layout_ops = { | ||
184 | .proc_getdeviceinfo = nfsd4_block_proc_getdeviceinfo, | ||
185 | .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo, | ||
186 | .proc_layoutget = nfsd4_block_proc_layoutget, | ||
187 | .encode_layoutget = nfsd4_block_encode_layoutget, | ||
188 | .proc_layoutcommit = nfsd4_block_proc_layoutcommit, | ||
189 | }; | ||
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c new file mode 100644 index 000000000000..9da89fddab33 --- /dev/null +++ b/fs/nfsd/blocklayoutxdr.c | |||
@@ -0,0 +1,157 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014 Christoph Hellwig. | ||
3 | */ | ||
4 | #include <linux/sunrpc/svc.h> | ||
5 | #include <linux/exportfs.h> | ||
6 | #include <linux/nfs4.h> | ||
7 | |||
8 | #include "nfsd.h" | ||
9 | #include "blocklayoutxdr.h" | ||
10 | |||
11 | #define NFSDDBG_FACILITY NFSDDBG_PNFS | ||
12 | |||
13 | |||
14 | __be32 | ||
15 | nfsd4_block_encode_layoutget(struct xdr_stream *xdr, | ||
16 | struct nfsd4_layoutget *lgp) | ||
17 | { | ||
18 | struct pnfs_block_extent *b = lgp->lg_content; | ||
19 | int len = sizeof(__be32) + 5 * sizeof(__be64) + sizeof(__be32); | ||
20 | __be32 *p; | ||
21 | |||
22 | p = xdr_reserve_space(xdr, sizeof(__be32) + len); | ||
23 | if (!p) | ||
24 | return nfserr_toosmall; | ||
25 | |||
26 | *p++ = cpu_to_be32(len); | ||
27 | *p++ = cpu_to_be32(1); /* we always return a single extent */ | ||
28 | |||
29 | p = xdr_encode_opaque_fixed(p, &b->vol_id, | ||
30 | sizeof(struct nfsd4_deviceid)); | ||
31 | p = xdr_encode_hyper(p, b->foff); | ||
32 | p = xdr_encode_hyper(p, b->len); | ||
33 | p = xdr_encode_hyper(p, b->soff); | ||
34 | *p++ = cpu_to_be32(b->es); | ||
35 | return 0; | ||
36 | } | ||
37 | |||
38 | static int | ||
39 | nfsd4_block_encode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b) | ||
40 | { | ||
41 | __be32 *p; | ||
42 | int len; | ||
43 | |||
44 | switch (b->type) { | ||
45 | case PNFS_BLOCK_VOLUME_SIMPLE: | ||
46 | len = 4 + 4 + 8 + 4 + b->simple.sig_len; | ||
47 | p = xdr_reserve_space(xdr, len); | ||
48 | if (!p) | ||
49 | return -ETOOSMALL; | ||
50 | |||
51 | *p++ = cpu_to_be32(b->type); | ||
52 | *p++ = cpu_to_be32(1); /* single signature */ | ||
53 | p = xdr_encode_hyper(p, b->simple.offset); | ||
54 | p = xdr_encode_opaque(p, b->simple.sig, b->simple.sig_len); | ||
55 | break; | ||
56 | default: | ||
57 | return -ENOTSUPP; | ||
58 | } | ||
59 | |||
60 | return len; | ||
61 | } | ||
62 | |||
63 | __be32 | ||
64 | nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr, | ||
65 | struct nfsd4_getdeviceinfo *gdp) | ||
66 | { | ||
67 | struct pnfs_block_deviceaddr *dev = gdp->gd_device; | ||
68 | int len = sizeof(__be32), ret, i; | ||
69 | __be32 *p; | ||
70 | |||
71 | p = xdr_reserve_space(xdr, len + sizeof(__be32)); | ||
72 | if (!p) | ||
73 | return nfserr_resource; | ||
74 | |||
75 | for (i = 0; i < dev->nr_volumes; i++) { | ||
76 | ret = nfsd4_block_encode_volume(xdr, &dev->volumes[i]); | ||
77 | if (ret < 0) | ||
78 | return nfserrno(ret); | ||
79 | len += ret; | ||
80 | } | ||
81 | |||
82 | /* | ||
83 | * Fill in the overall length and number of volumes at the beginning | ||
84 | * of the layout. | ||
85 | */ | ||
86 | *p++ = cpu_to_be32(len); | ||
87 | *p++ = cpu_to_be32(dev->nr_volumes); | ||
88 | return 0; | ||
89 | } | ||
90 | |||
91 | int | ||
92 | nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, | ||
93 | u32 block_size) | ||
94 | { | ||
95 | struct iomap *iomaps; | ||
96 | u32 nr_iomaps, expected, i; | ||
97 | |||
98 | if (len < sizeof(u32)) { | ||
99 | dprintk("%s: extent array too small: %u\n", __func__, len); | ||
100 | return -EINVAL; | ||
101 | } | ||
102 | |||
103 | nr_iomaps = be32_to_cpup(p++); | ||
104 | expected = sizeof(__be32) + nr_iomaps * NFS4_BLOCK_EXTENT_SIZE; | ||
105 | if (len != expected) { | ||
106 | dprintk("%s: extent array size mismatch: %u/%u\n", | ||
107 | __func__, len, expected); | ||
108 | return -EINVAL; | ||
109 | } | ||
110 | |||
111 | iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL); | ||
112 | if (!iomaps) { | ||
113 | dprintk("%s: failed to allocate extent array\n", __func__); | ||
114 | return -ENOMEM; | ||
115 | } | ||
116 | |||
117 | for (i = 0; i < nr_iomaps; i++) { | ||
118 | struct pnfs_block_extent bex; | ||
119 | |||
120 | memcpy(&bex.vol_id, p, sizeof(struct nfsd4_deviceid)); | ||
121 | p += XDR_QUADLEN(sizeof(struct nfsd4_deviceid)); | ||
122 | |||
123 | p = xdr_decode_hyper(p, &bex.foff); | ||
124 | if (bex.foff & (block_size - 1)) { | ||
125 | dprintk("%s: unaligned offset %lld\n", | ||
126 | __func__, bex.foff); | ||
127 | goto fail; | ||
128 | } | ||
129 | p = xdr_decode_hyper(p, &bex.len); | ||
130 | if (bex.len & (block_size - 1)) { | ||
131 | dprintk("%s: unaligned length %lld\n", | ||
132 | __func__, bex.foff); | ||
133 | goto fail; | ||
134 | } | ||
135 | p = xdr_decode_hyper(p, &bex.soff); | ||
136 | if (bex.soff & (block_size - 1)) { | ||
137 | dprintk("%s: unaligned disk offset %lld\n", | ||
138 | __func__, bex.soff); | ||
139 | goto fail; | ||
140 | } | ||
141 | bex.es = be32_to_cpup(p++); | ||
142 | if (bex.es != PNFS_BLOCK_READWRITE_DATA) { | ||
143 | dprintk("%s: incorrect extent state %d\n", | ||
144 | __func__, bex.es); | ||
145 | goto fail; | ||
146 | } | ||
147 | |||
148 | iomaps[i].offset = bex.foff; | ||
149 | iomaps[i].length = bex.len; | ||
150 | } | ||
151 | |||
152 | *iomapp = iomaps; | ||
153 | return nr_iomaps; | ||
154 | fail: | ||
155 | kfree(iomaps); | ||
156 | return -EINVAL; | ||
157 | } | ||
diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h new file mode 100644 index 000000000000..fdc79037c0e7 --- /dev/null +++ b/fs/nfsd/blocklayoutxdr.h | |||
@@ -0,0 +1,62 @@ | |||
1 | #ifndef _NFSD_BLOCKLAYOUTXDR_H | ||
2 | #define _NFSD_BLOCKLAYOUTXDR_H 1 | ||
3 | |||
4 | #include <linux/blkdev.h> | ||
5 | #include "xdr4.h" | ||
6 | |||
7 | struct iomap; | ||
8 | struct xdr_stream; | ||
9 | |||
10 | enum pnfs_block_extent_state { | ||
11 | PNFS_BLOCK_READWRITE_DATA = 0, | ||
12 | PNFS_BLOCK_READ_DATA = 1, | ||
13 | PNFS_BLOCK_INVALID_DATA = 2, | ||
14 | PNFS_BLOCK_NONE_DATA = 3, | ||
15 | }; | ||
16 | |||
17 | struct pnfs_block_extent { | ||
18 | struct nfsd4_deviceid vol_id; | ||
19 | u64 foff; | ||
20 | u64 len; | ||
21 | u64 soff; | ||
22 | enum pnfs_block_extent_state es; | ||
23 | }; | ||
24 | #define NFS4_BLOCK_EXTENT_SIZE 44 | ||
25 | |||
26 | enum pnfs_block_volume_type { | ||
27 | PNFS_BLOCK_VOLUME_SIMPLE = 0, | ||
28 | PNFS_BLOCK_VOLUME_SLICE = 1, | ||
29 | PNFS_BLOCK_VOLUME_CONCAT = 2, | ||
30 | PNFS_BLOCK_VOLUME_STRIPE = 3, | ||
31 | }; | ||
32 | |||
33 | /* | ||
34 | * Random upper cap for the uuid length to avoid unbounded allocation. | ||
35 | * Not actually limited by the protocol. | ||
36 | */ | ||
37 | #define PNFS_BLOCK_UUID_LEN 128 | ||
38 | |||
39 | struct pnfs_block_volume { | ||
40 | enum pnfs_block_volume_type type; | ||
41 | union { | ||
42 | struct { | ||
43 | u64 offset; | ||
44 | u32 sig_len; | ||
45 | u8 sig[PNFS_BLOCK_UUID_LEN]; | ||
46 | } simple; | ||
47 | }; | ||
48 | }; | ||
49 | |||
50 | struct pnfs_block_deviceaddr { | ||
51 | u32 nr_volumes; | ||
52 | struct pnfs_block_volume volumes[]; | ||
53 | }; | ||
54 | |||
55 | __be32 nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr, | ||
56 | struct nfsd4_getdeviceinfo *gdp); | ||
57 | __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr, | ||
58 | struct nfsd4_layoutget *lgp); | ||
59 | int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, | ||
60 | u32 block_size); | ||
61 | |||
62 | #endif /* _NFSD_BLOCKLAYOUTXDR_H */ | ||
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 30a739d896ff..c3e3b6e55ae2 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include "nfsd.h" | 20 | #include "nfsd.h" |
21 | #include "nfsfh.h" | 21 | #include "nfsfh.h" |
22 | #include "netns.h" | 22 | #include "netns.h" |
23 | #include "pnfs.h" | ||
23 | 24 | ||
24 | #define NFSDDBG_FACILITY NFSDDBG_EXPORT | 25 | #define NFSDDBG_FACILITY NFSDDBG_EXPORT |
25 | 26 | ||
@@ -545,6 +546,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
545 | 546 | ||
546 | exp.ex_client = dom; | 547 | exp.ex_client = dom; |
547 | exp.cd = cd; | 548 | exp.cd = cd; |
549 | exp.ex_devid_map = NULL; | ||
548 | 550 | ||
549 | /* expiry */ | 551 | /* expiry */ |
550 | err = -EINVAL; | 552 | err = -EINVAL; |
@@ -621,6 +623,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
621 | if (!gid_valid(exp.ex_anon_gid)) | 623 | if (!gid_valid(exp.ex_anon_gid)) |
622 | goto out4; | 624 | goto out4; |
623 | err = 0; | 625 | err = 0; |
626 | |||
627 | nfsd4_setup_layout_type(&exp); | ||
624 | } | 628 | } |
625 | 629 | ||
626 | expp = svc_export_lookup(&exp); | 630 | expp = svc_export_lookup(&exp); |
@@ -703,6 +707,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) | |||
703 | new->ex_fslocs.locations = NULL; | 707 | new->ex_fslocs.locations = NULL; |
704 | new->ex_fslocs.locations_count = 0; | 708 | new->ex_fslocs.locations_count = 0; |
705 | new->ex_fslocs.migrated = 0; | 709 | new->ex_fslocs.migrated = 0; |
710 | new->ex_layout_type = 0; | ||
706 | new->ex_uuid = NULL; | 711 | new->ex_uuid = NULL; |
707 | new->cd = item->cd; | 712 | new->cd = item->cd; |
708 | } | 713 | } |
@@ -717,6 +722,8 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem) | |||
717 | new->ex_anon_uid = item->ex_anon_uid; | 722 | new->ex_anon_uid = item->ex_anon_uid; |
718 | new->ex_anon_gid = item->ex_anon_gid; | 723 | new->ex_anon_gid = item->ex_anon_gid; |
719 | new->ex_fsid = item->ex_fsid; | 724 | new->ex_fsid = item->ex_fsid; |
725 | new->ex_devid_map = item->ex_devid_map; | ||
726 | item->ex_devid_map = NULL; | ||
720 | new->ex_uuid = item->ex_uuid; | 727 | new->ex_uuid = item->ex_uuid; |
721 | item->ex_uuid = NULL; | 728 | item->ex_uuid = NULL; |
722 | new->ex_fslocs.locations = item->ex_fslocs.locations; | 729 | new->ex_fslocs.locations = item->ex_fslocs.locations; |
@@ -725,6 +732,7 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem) | |||
725 | item->ex_fslocs.locations_count = 0; | 732 | item->ex_fslocs.locations_count = 0; |
726 | new->ex_fslocs.migrated = item->ex_fslocs.migrated; | 733 | new->ex_fslocs.migrated = item->ex_fslocs.migrated; |
727 | item->ex_fslocs.migrated = 0; | 734 | item->ex_fslocs.migrated = 0; |
735 | new->ex_layout_type = item->ex_layout_type; | ||
728 | new->ex_nflavors = item->ex_nflavors; | 736 | new->ex_nflavors = item->ex_nflavors; |
729 | for (i = 0; i < MAX_SECINFO_LIST; i++) { | 737 | for (i = 0; i < MAX_SECINFO_LIST; i++) { |
730 | new->ex_flavors[i] = item->ex_flavors[i]; | 738 | new->ex_flavors[i] = item->ex_flavors[i]; |
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index 04dc8c167b0c..1f52bfcc436f 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h | |||
@@ -56,6 +56,8 @@ struct svc_export { | |||
56 | struct nfsd4_fs_locations ex_fslocs; | 56 | struct nfsd4_fs_locations ex_fslocs; |
57 | uint32_t ex_nflavors; | 57 | uint32_t ex_nflavors; |
58 | struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST]; | 58 | struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST]; |
59 | enum pnfs_layouttype ex_layout_type; | ||
60 | struct nfsd4_deviceid_map *ex_devid_map; | ||
59 | struct cache_detail *cd; | 61 | struct cache_detail *cd; |
60 | }; | 62 | }; |
61 | 63 | ||
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 7cbdf1b2e4ab..58277859a467 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -546,6 +546,102 @@ out: | |||
546 | return status; | 546 | return status; |
547 | } | 547 | } |
548 | 548 | ||
549 | #ifdef CONFIG_NFSD_PNFS | ||
550 | /* | ||
551 | * CB_LAYOUTRECALL4args | ||
552 | * | ||
553 | * struct layoutrecall_file4 { | ||
554 | * nfs_fh4 lor_fh; | ||
555 | * offset4 lor_offset; | ||
556 | * length4 lor_length; | ||
557 | * stateid4 lor_stateid; | ||
558 | * }; | ||
559 | * | ||
560 | * union layoutrecall4 switch(layoutrecall_type4 lor_recalltype) { | ||
561 | * case LAYOUTRECALL4_FILE: | ||
562 | * layoutrecall_file4 lor_layout; | ||
563 | * case LAYOUTRECALL4_FSID: | ||
564 | * fsid4 lor_fsid; | ||
565 | * case LAYOUTRECALL4_ALL: | ||
566 | * void; | ||
567 | * }; | ||
568 | * | ||
569 | * struct CB_LAYOUTRECALL4args { | ||
570 | * layouttype4 clora_type; | ||
571 | * layoutiomode4 clora_iomode; | ||
572 | * bool clora_changed; | ||
573 | * layoutrecall4 clora_recall; | ||
574 | * }; | ||
575 | */ | ||
576 | static void encode_cb_layout4args(struct xdr_stream *xdr, | ||
577 | const struct nfs4_layout_stateid *ls, | ||
578 | struct nfs4_cb_compound_hdr *hdr) | ||
579 | { | ||
580 | __be32 *p; | ||
581 | |||
582 | BUG_ON(hdr->minorversion == 0); | ||
583 | |||
584 | p = xdr_reserve_space(xdr, 5 * 4); | ||
585 | *p++ = cpu_to_be32(OP_CB_LAYOUTRECALL); | ||
586 | *p++ = cpu_to_be32(ls->ls_layout_type); | ||
587 | *p++ = cpu_to_be32(IOMODE_ANY); | ||
588 | *p++ = cpu_to_be32(1); | ||
589 | *p = cpu_to_be32(RETURN_FILE); | ||
590 | |||
591 | encode_nfs_fh4(xdr, &ls->ls_stid.sc_file->fi_fhandle); | ||
592 | |||
593 | p = xdr_reserve_space(xdr, 2 * 8); | ||
594 | p = xdr_encode_hyper(p, 0); | ||
595 | xdr_encode_hyper(p, NFS4_MAX_UINT64); | ||
596 | |||
597 | encode_stateid4(xdr, &ls->ls_recall_sid); | ||
598 | |||
599 | hdr->nops++; | ||
600 | } | ||
601 | |||
602 | static void nfs4_xdr_enc_cb_layout(struct rpc_rqst *req, | ||
603 | struct xdr_stream *xdr, | ||
604 | const struct nfsd4_callback *cb) | ||
605 | { | ||
606 | const struct nfs4_layout_stateid *ls = | ||
607 | container_of(cb, struct nfs4_layout_stateid, ls_recall); | ||
608 | struct nfs4_cb_compound_hdr hdr = { | ||
609 | .ident = 0, | ||
610 | .minorversion = cb->cb_minorversion, | ||
611 | }; | ||
612 | |||
613 | encode_cb_compound4args(xdr, &hdr); | ||
614 | encode_cb_sequence4args(xdr, cb, &hdr); | ||
615 | encode_cb_layout4args(xdr, ls, &hdr); | ||
616 | encode_cb_nops(&hdr); | ||
617 | } | ||
618 | |||
619 | static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp, | ||
620 | struct xdr_stream *xdr, | ||
621 | struct nfsd4_callback *cb) | ||
622 | { | ||
623 | struct nfs4_cb_compound_hdr hdr; | ||
624 | enum nfsstat4 nfserr; | ||
625 | int status; | ||
626 | |||
627 | status = decode_cb_compound4res(xdr, &hdr); | ||
628 | if (unlikely(status)) | ||
629 | goto out; | ||
630 | if (cb) { | ||
631 | status = decode_cb_sequence4res(xdr, cb); | ||
632 | if (unlikely(status)) | ||
633 | goto out; | ||
634 | } | ||
635 | status = decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &nfserr); | ||
636 | if (unlikely(status)) | ||
637 | goto out; | ||
638 | if (unlikely(nfserr != NFS4_OK)) | ||
639 | status = nfs_cb_stat_to_errno(nfserr); | ||
640 | out: | ||
641 | return status; | ||
642 | } | ||
643 | #endif /* CONFIG_NFSD_PNFS */ | ||
644 | |||
549 | /* | 645 | /* |
550 | * RPC procedure tables | 646 | * RPC procedure tables |
551 | */ | 647 | */ |
@@ -563,6 +659,9 @@ out: | |||
563 | static struct rpc_procinfo nfs4_cb_procedures[] = { | 659 | static struct rpc_procinfo nfs4_cb_procedures[] = { |
564 | PROC(CB_NULL, NULL, cb_null, cb_null), | 660 | PROC(CB_NULL, NULL, cb_null, cb_null), |
565 | PROC(CB_RECALL, COMPOUND, cb_recall, cb_recall), | 661 | PROC(CB_RECALL, COMPOUND, cb_recall, cb_recall), |
662 | #ifdef CONFIG_NFSD_PNFS | ||
663 | PROC(CB_LAYOUT, COMPOUND, cb_layout, cb_layout), | ||
664 | #endif | ||
566 | }; | 665 | }; |
567 | 666 | ||
568 | static struct rpc_version nfs_cb_version4 = { | 667 | static struct rpc_version nfs_cb_version4 = { |
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c new file mode 100644 index 000000000000..3c1bfa155571 --- /dev/null +++ b/fs/nfsd/nfs4layouts.c | |||
@@ -0,0 +1,721 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014 Christoph Hellwig. | ||
3 | */ | ||
4 | #include <linux/kmod.h> | ||
5 | #include <linux/file.h> | ||
6 | #include <linux/jhash.h> | ||
7 | #include <linux/sched.h> | ||
8 | #include <linux/sunrpc/addr.h> | ||
9 | |||
10 | #include "pnfs.h" | ||
11 | #include "netns.h" | ||
12 | #include "trace.h" | ||
13 | |||
14 | #define NFSDDBG_FACILITY NFSDDBG_PNFS | ||
15 | |||
16 | struct nfs4_layout { | ||
17 | struct list_head lo_perstate; | ||
18 | struct nfs4_layout_stateid *lo_state; | ||
19 | struct nfsd4_layout_seg lo_seg; | ||
20 | }; | ||
21 | |||
22 | static struct kmem_cache *nfs4_layout_cache; | ||
23 | static struct kmem_cache *nfs4_layout_stateid_cache; | ||
24 | |||
25 | static struct nfsd4_callback_ops nfsd4_cb_layout_ops; | ||
26 | static const struct lock_manager_operations nfsd4_layouts_lm_ops; | ||
27 | |||
28 | const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = { | ||
29 | [LAYOUT_BLOCK_VOLUME] = &bl_layout_ops, | ||
30 | }; | ||
31 | |||
32 | /* pNFS device ID to export fsid mapping */ | ||
33 | #define DEVID_HASH_BITS 8 | ||
34 | #define DEVID_HASH_SIZE (1 << DEVID_HASH_BITS) | ||
35 | #define DEVID_HASH_MASK (DEVID_HASH_SIZE - 1) | ||
36 | static u64 nfsd_devid_seq = 1; | ||
37 | static struct list_head nfsd_devid_hash[DEVID_HASH_SIZE]; | ||
38 | static DEFINE_SPINLOCK(nfsd_devid_lock); | ||
39 | |||
40 | static inline u32 devid_hashfn(u64 idx) | ||
41 | { | ||
42 | return jhash_2words(idx, idx >> 32, 0) & DEVID_HASH_MASK; | ||
43 | } | ||
44 | |||
45 | static void | ||
46 | nfsd4_alloc_devid_map(const struct svc_fh *fhp) | ||
47 | { | ||
48 | const struct knfsd_fh *fh = &fhp->fh_handle; | ||
49 | size_t fsid_len = key_len(fh->fh_fsid_type); | ||
50 | struct nfsd4_deviceid_map *map, *old; | ||
51 | int i; | ||
52 | |||
53 | map = kzalloc(sizeof(*map) + fsid_len, GFP_KERNEL); | ||
54 | if (!map) | ||
55 | return; | ||
56 | |||
57 | map->fsid_type = fh->fh_fsid_type; | ||
58 | memcpy(&map->fsid, fh->fh_fsid, fsid_len); | ||
59 | |||
60 | spin_lock(&nfsd_devid_lock); | ||
61 | if (fhp->fh_export->ex_devid_map) | ||
62 | goto out_unlock; | ||
63 | |||
64 | for (i = 0; i < DEVID_HASH_SIZE; i++) { | ||
65 | list_for_each_entry(old, &nfsd_devid_hash[i], hash) { | ||
66 | if (old->fsid_type != fh->fh_fsid_type) | ||
67 | continue; | ||
68 | if (memcmp(old->fsid, fh->fh_fsid, | ||
69 | key_len(old->fsid_type))) | ||
70 | continue; | ||
71 | |||
72 | fhp->fh_export->ex_devid_map = old; | ||
73 | goto out_unlock; | ||
74 | } | ||
75 | } | ||
76 | |||
77 | map->idx = nfsd_devid_seq++; | ||
78 | list_add_tail_rcu(&map->hash, &nfsd_devid_hash[devid_hashfn(map->idx)]); | ||
79 | fhp->fh_export->ex_devid_map = map; | ||
80 | map = NULL; | ||
81 | |||
82 | out_unlock: | ||
83 | spin_unlock(&nfsd_devid_lock); | ||
84 | kfree(map); | ||
85 | } | ||
86 | |||
87 | struct nfsd4_deviceid_map * | ||
88 | nfsd4_find_devid_map(int idx) | ||
89 | { | ||
90 | struct nfsd4_deviceid_map *map, *ret = NULL; | ||
91 | |||
92 | rcu_read_lock(); | ||
93 | list_for_each_entry_rcu(map, &nfsd_devid_hash[devid_hashfn(idx)], hash) | ||
94 | if (map->idx == idx) | ||
95 | ret = map; | ||
96 | rcu_read_unlock(); | ||
97 | |||
98 | return ret; | ||
99 | } | ||
100 | |||
101 | int | ||
102 | nfsd4_set_deviceid(struct nfsd4_deviceid *id, const struct svc_fh *fhp, | ||
103 | u32 device_generation) | ||
104 | { | ||
105 | if (!fhp->fh_export->ex_devid_map) { | ||
106 | nfsd4_alloc_devid_map(fhp); | ||
107 | if (!fhp->fh_export->ex_devid_map) | ||
108 | return -ENOMEM; | ||
109 | } | ||
110 | |||
111 | id->fsid_idx = fhp->fh_export->ex_devid_map->idx; | ||
112 | id->generation = device_generation; | ||
113 | id->pad = 0; | ||
114 | return 0; | ||
115 | } | ||
116 | |||
117 | void nfsd4_setup_layout_type(struct svc_export *exp) | ||
118 | { | ||
119 | struct super_block *sb = exp->ex_path.mnt->mnt_sb; | ||
120 | |||
121 | if (exp->ex_flags & NFSEXP_NOPNFS) | ||
122 | return; | ||
123 | |||
124 | if (sb->s_export_op->get_uuid && | ||
125 | sb->s_export_op->map_blocks && | ||
126 | sb->s_export_op->commit_blocks) | ||
127 | exp->ex_layout_type = LAYOUT_BLOCK_VOLUME; | ||
128 | } | ||
129 | |||
130 | static void | ||
131 | nfsd4_free_layout_stateid(struct nfs4_stid *stid) | ||
132 | { | ||
133 | struct nfs4_layout_stateid *ls = layoutstateid(stid); | ||
134 | struct nfs4_client *clp = ls->ls_stid.sc_client; | ||
135 | struct nfs4_file *fp = ls->ls_stid.sc_file; | ||
136 | |||
137 | trace_layoutstate_free(&ls->ls_stid.sc_stateid); | ||
138 | |||
139 | spin_lock(&clp->cl_lock); | ||
140 | list_del_init(&ls->ls_perclnt); | ||
141 | spin_unlock(&clp->cl_lock); | ||
142 | |||
143 | spin_lock(&fp->fi_lock); | ||
144 | list_del_init(&ls->ls_perfile); | ||
145 | spin_unlock(&fp->fi_lock); | ||
146 | |||
147 | vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls); | ||
148 | fput(ls->ls_file); | ||
149 | |||
150 | if (ls->ls_recalled) | ||
151 | atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls); | ||
152 | |||
153 | kmem_cache_free(nfs4_layout_stateid_cache, ls); | ||
154 | } | ||
155 | |||
156 | static int | ||
157 | nfsd4_layout_setlease(struct nfs4_layout_stateid *ls) | ||
158 | { | ||
159 | struct file_lock *fl; | ||
160 | int status; | ||
161 | |||
162 | fl = locks_alloc_lock(); | ||
163 | if (!fl) | ||
164 | return -ENOMEM; | ||
165 | locks_init_lock(fl); | ||
166 | fl->fl_lmops = &nfsd4_layouts_lm_ops; | ||
167 | fl->fl_flags = FL_LAYOUT; | ||
168 | fl->fl_type = F_RDLCK; | ||
169 | fl->fl_end = OFFSET_MAX; | ||
170 | fl->fl_owner = ls; | ||
171 | fl->fl_pid = current->tgid; | ||
172 | fl->fl_file = ls->ls_file; | ||
173 | |||
174 | status = vfs_setlease(fl->fl_file, fl->fl_type, &fl, NULL); | ||
175 | if (status) { | ||
176 | locks_free_lock(fl); | ||
177 | return status; | ||
178 | } | ||
179 | BUG_ON(fl != NULL); | ||
180 | return 0; | ||
181 | } | ||
182 | |||
183 | static struct nfs4_layout_stateid * | ||
184 | nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate, | ||
185 | struct nfs4_stid *parent, u32 layout_type) | ||
186 | { | ||
187 | struct nfs4_client *clp = cstate->clp; | ||
188 | struct nfs4_file *fp = parent->sc_file; | ||
189 | struct nfs4_layout_stateid *ls; | ||
190 | struct nfs4_stid *stp; | ||
191 | |||
192 | stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache); | ||
193 | if (!stp) | ||
194 | return NULL; | ||
195 | stp->sc_free = nfsd4_free_layout_stateid; | ||
196 | get_nfs4_file(fp); | ||
197 | stp->sc_file = fp; | ||
198 | |||
199 | ls = layoutstateid(stp); | ||
200 | INIT_LIST_HEAD(&ls->ls_perclnt); | ||
201 | INIT_LIST_HEAD(&ls->ls_perfile); | ||
202 | spin_lock_init(&ls->ls_lock); | ||
203 | INIT_LIST_HEAD(&ls->ls_layouts); | ||
204 | ls->ls_layout_type = layout_type; | ||
205 | nfsd4_init_cb(&ls->ls_recall, clp, &nfsd4_cb_layout_ops, | ||
206 | NFSPROC4_CLNT_CB_LAYOUT); | ||
207 | |||
208 | if (parent->sc_type == NFS4_DELEG_STID) | ||
209 | ls->ls_file = get_file(fp->fi_deleg_file); | ||
210 | else | ||
211 | ls->ls_file = find_any_file(fp); | ||
212 | BUG_ON(!ls->ls_file); | ||
213 | |||
214 | if (nfsd4_layout_setlease(ls)) { | ||
215 | put_nfs4_file(fp); | ||
216 | kmem_cache_free(nfs4_layout_stateid_cache, ls); | ||
217 | return NULL; | ||
218 | } | ||
219 | |||
220 | spin_lock(&clp->cl_lock); | ||
221 | stp->sc_type = NFS4_LAYOUT_STID; | ||
222 | list_add(&ls->ls_perclnt, &clp->cl_lo_states); | ||
223 | spin_unlock(&clp->cl_lock); | ||
224 | |||
225 | spin_lock(&fp->fi_lock); | ||
226 | list_add(&ls->ls_perfile, &fp->fi_lo_states); | ||
227 | spin_unlock(&fp->fi_lock); | ||
228 | |||
229 | trace_layoutstate_alloc(&ls->ls_stid.sc_stateid); | ||
230 | return ls; | ||
231 | } | ||
232 | |||
233 | __be32 | ||
234 | nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp, | ||
235 | struct nfsd4_compound_state *cstate, stateid_t *stateid, | ||
236 | bool create, u32 layout_type, struct nfs4_layout_stateid **lsp) | ||
237 | { | ||
238 | struct nfs4_layout_stateid *ls; | ||
239 | struct nfs4_stid *stid; | ||
240 | unsigned char typemask = NFS4_LAYOUT_STID; | ||
241 | __be32 status; | ||
242 | |||
243 | if (create) | ||
244 | typemask |= (NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID); | ||
245 | |||
246 | status = nfsd4_lookup_stateid(cstate, stateid, typemask, &stid, | ||
247 | net_generic(SVC_NET(rqstp), nfsd_net_id)); | ||
248 | if (status) | ||
249 | goto out; | ||
250 | |||
251 | if (!fh_match(&cstate->current_fh.fh_handle, | ||
252 | &stid->sc_file->fi_fhandle)) { | ||
253 | status = nfserr_bad_stateid; | ||
254 | goto out_put_stid; | ||
255 | } | ||
256 | |||
257 | if (stid->sc_type != NFS4_LAYOUT_STID) { | ||
258 | ls = nfsd4_alloc_layout_stateid(cstate, stid, layout_type); | ||
259 | nfs4_put_stid(stid); | ||
260 | |||
261 | status = nfserr_jukebox; | ||
262 | if (!ls) | ||
263 | goto out; | ||
264 | } else { | ||
265 | ls = container_of(stid, struct nfs4_layout_stateid, ls_stid); | ||
266 | |||
267 | status = nfserr_bad_stateid; | ||
268 | if (stateid->si_generation > stid->sc_stateid.si_generation) | ||
269 | goto out_put_stid; | ||
270 | if (layout_type != ls->ls_layout_type) | ||
271 | goto out_put_stid; | ||
272 | } | ||
273 | |||
274 | *lsp = ls; | ||
275 | return 0; | ||
276 | |||
277 | out_put_stid: | ||
278 | nfs4_put_stid(stid); | ||
279 | out: | ||
280 | return status; | ||
281 | } | ||
282 | |||
283 | static void | ||
284 | nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls) | ||
285 | { | ||
286 | spin_lock(&ls->ls_lock); | ||
287 | if (ls->ls_recalled) | ||
288 | goto out_unlock; | ||
289 | |||
290 | ls->ls_recalled = true; | ||
291 | atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls); | ||
292 | if (list_empty(&ls->ls_layouts)) | ||
293 | goto out_unlock; | ||
294 | |||
295 | trace_layout_recall(&ls->ls_stid.sc_stateid); | ||
296 | |||
297 | atomic_inc(&ls->ls_stid.sc_count); | ||
298 | update_stateid(&ls->ls_stid.sc_stateid); | ||
299 | memcpy(&ls->ls_recall_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t)); | ||
300 | nfsd4_run_cb(&ls->ls_recall); | ||
301 | |||
302 | out_unlock: | ||
303 | spin_unlock(&ls->ls_lock); | ||
304 | } | ||
305 | |||
306 | static inline u64 | ||
307 | layout_end(struct nfsd4_layout_seg *seg) | ||
308 | { | ||
309 | u64 end = seg->offset + seg->length; | ||
310 | return end >= seg->offset ? end : NFS4_MAX_UINT64; | ||
311 | } | ||
312 | |||
313 | static void | ||
314 | layout_update_len(struct nfsd4_layout_seg *lo, u64 end) | ||
315 | { | ||
316 | if (end == NFS4_MAX_UINT64) | ||
317 | lo->length = NFS4_MAX_UINT64; | ||
318 | else | ||
319 | lo->length = end - lo->offset; | ||
320 | } | ||
321 | |||
322 | static bool | ||
323 | layouts_overlapping(struct nfs4_layout *lo, struct nfsd4_layout_seg *s) | ||
324 | { | ||
325 | if (s->iomode != IOMODE_ANY && s->iomode != lo->lo_seg.iomode) | ||
326 | return false; | ||
327 | if (layout_end(&lo->lo_seg) <= s->offset) | ||
328 | return false; | ||
329 | if (layout_end(s) <= lo->lo_seg.offset) | ||
330 | return false; | ||
331 | return true; | ||
332 | } | ||
333 | |||
334 | static bool | ||
335 | layouts_try_merge(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *new) | ||
336 | { | ||
337 | if (lo->iomode != new->iomode) | ||
338 | return false; | ||
339 | if (layout_end(new) < lo->offset) | ||
340 | return false; | ||
341 | if (layout_end(lo) < new->offset) | ||
342 | return false; | ||
343 | |||
344 | lo->offset = min(lo->offset, new->offset); | ||
345 | layout_update_len(lo, max(layout_end(lo), layout_end(new))); | ||
346 | return true; | ||
347 | } | ||
348 | |||
349 | static __be32 | ||
350 | nfsd4_recall_conflict(struct nfs4_layout_stateid *ls) | ||
351 | { | ||
352 | struct nfs4_file *fp = ls->ls_stid.sc_file; | ||
353 | struct nfs4_layout_stateid *l, *n; | ||
354 | __be32 nfserr = nfs_ok; | ||
355 | |||
356 | assert_spin_locked(&fp->fi_lock); | ||
357 | |||
358 | list_for_each_entry_safe(l, n, &fp->fi_lo_states, ls_perfile) { | ||
359 | if (l != ls) { | ||
360 | nfsd4_recall_file_layout(l); | ||
361 | nfserr = nfserr_recallconflict; | ||
362 | } | ||
363 | } | ||
364 | |||
365 | return nfserr; | ||
366 | } | ||
367 | |||
368 | __be32 | ||
369 | nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls) | ||
370 | { | ||
371 | struct nfsd4_layout_seg *seg = &lgp->lg_seg; | ||
372 | struct nfs4_file *fp = ls->ls_stid.sc_file; | ||
373 | struct nfs4_layout *lp, *new = NULL; | ||
374 | __be32 nfserr; | ||
375 | |||
376 | spin_lock(&fp->fi_lock); | ||
377 | nfserr = nfsd4_recall_conflict(ls); | ||
378 | if (nfserr) | ||
379 | goto out; | ||
380 | spin_lock(&ls->ls_lock); | ||
381 | list_for_each_entry(lp, &ls->ls_layouts, lo_perstate) { | ||
382 | if (layouts_try_merge(&lp->lo_seg, seg)) | ||
383 | goto done; | ||
384 | } | ||
385 | spin_unlock(&ls->ls_lock); | ||
386 | spin_unlock(&fp->fi_lock); | ||
387 | |||
388 | new = kmem_cache_alloc(nfs4_layout_cache, GFP_KERNEL); | ||
389 | if (!new) | ||
390 | return nfserr_jukebox; | ||
391 | memcpy(&new->lo_seg, seg, sizeof(lp->lo_seg)); | ||
392 | new->lo_state = ls; | ||
393 | |||
394 | spin_lock(&fp->fi_lock); | ||
395 | nfserr = nfsd4_recall_conflict(ls); | ||
396 | if (nfserr) | ||
397 | goto out; | ||
398 | spin_lock(&ls->ls_lock); | ||
399 | list_for_each_entry(lp, &ls->ls_layouts, lo_perstate) { | ||
400 | if (layouts_try_merge(&lp->lo_seg, seg)) | ||
401 | goto done; | ||
402 | } | ||
403 | |||
404 | atomic_inc(&ls->ls_stid.sc_count); | ||
405 | list_add_tail(&new->lo_perstate, &ls->ls_layouts); | ||
406 | new = NULL; | ||
407 | done: | ||
408 | update_stateid(&ls->ls_stid.sc_stateid); | ||
409 | memcpy(&lgp->lg_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t)); | ||
410 | spin_unlock(&ls->ls_lock); | ||
411 | out: | ||
412 | spin_unlock(&fp->fi_lock); | ||
413 | if (new) | ||
414 | kmem_cache_free(nfs4_layout_cache, new); | ||
415 | return nfserr; | ||
416 | } | ||
417 | |||
418 | static void | ||
419 | nfsd4_free_layouts(struct list_head *reaplist) | ||
420 | { | ||
421 | while (!list_empty(reaplist)) { | ||
422 | struct nfs4_layout *lp = list_first_entry(reaplist, | ||
423 | struct nfs4_layout, lo_perstate); | ||
424 | |||
425 | list_del(&lp->lo_perstate); | ||
426 | nfs4_put_stid(&lp->lo_state->ls_stid); | ||
427 | kmem_cache_free(nfs4_layout_cache, lp); | ||
428 | } | ||
429 | } | ||
430 | |||
431 | static void | ||
432 | nfsd4_return_file_layout(struct nfs4_layout *lp, struct nfsd4_layout_seg *seg, | ||
433 | struct list_head *reaplist) | ||
434 | { | ||
435 | struct nfsd4_layout_seg *lo = &lp->lo_seg; | ||
436 | u64 end = layout_end(lo); | ||
437 | |||
438 | if (seg->offset <= lo->offset) { | ||
439 | if (layout_end(seg) >= end) { | ||
440 | list_move_tail(&lp->lo_perstate, reaplist); | ||
441 | return; | ||
442 | } | ||
443 | end = seg->offset; | ||
444 | } else { | ||
445 | /* retain the whole layout segment on a split. */ | ||
446 | if (layout_end(seg) < end) { | ||
447 | dprintk("%s: split not supported\n", __func__); | ||
448 | return; | ||
449 | } | ||
450 | |||
451 | lo->offset = layout_end(seg); | ||
452 | } | ||
453 | |||
454 | layout_update_len(lo, end); | ||
455 | } | ||
456 | |||
457 | __be32 | ||
458 | nfsd4_return_file_layouts(struct svc_rqst *rqstp, | ||
459 | struct nfsd4_compound_state *cstate, | ||
460 | struct nfsd4_layoutreturn *lrp) | ||
461 | { | ||
462 | struct nfs4_layout_stateid *ls; | ||
463 | struct nfs4_layout *lp, *n; | ||
464 | LIST_HEAD(reaplist); | ||
465 | __be32 nfserr; | ||
466 | int found = 0; | ||
467 | |||
468 | nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lrp->lr_sid, | ||
469 | false, lrp->lr_layout_type, | ||
470 | &ls); | ||
471 | if (nfserr) { | ||
472 | trace_layout_return_lookup_fail(&lrp->lr_sid); | ||
473 | return nfserr; | ||
474 | } | ||
475 | |||
476 | spin_lock(&ls->ls_lock); | ||
477 | list_for_each_entry_safe(lp, n, &ls->ls_layouts, lo_perstate) { | ||
478 | if (layouts_overlapping(lp, &lrp->lr_seg)) { | ||
479 | nfsd4_return_file_layout(lp, &lrp->lr_seg, &reaplist); | ||
480 | found++; | ||
481 | } | ||
482 | } | ||
483 | if (!list_empty(&ls->ls_layouts)) { | ||
484 | if (found) { | ||
485 | update_stateid(&ls->ls_stid.sc_stateid); | ||
486 | memcpy(&lrp->lr_sid, &ls->ls_stid.sc_stateid, | ||
487 | sizeof(stateid_t)); | ||
488 | } | ||
489 | lrp->lrs_present = 1; | ||
490 | } else { | ||
491 | trace_layoutstate_unhash(&ls->ls_stid.sc_stateid); | ||
492 | nfs4_unhash_stid(&ls->ls_stid); | ||
493 | lrp->lrs_present = 0; | ||
494 | } | ||
495 | spin_unlock(&ls->ls_lock); | ||
496 | |||
497 | nfs4_put_stid(&ls->ls_stid); | ||
498 | nfsd4_free_layouts(&reaplist); | ||
499 | return nfs_ok; | ||
500 | } | ||
501 | |||
502 | __be32 | ||
503 | nfsd4_return_client_layouts(struct svc_rqst *rqstp, | ||
504 | struct nfsd4_compound_state *cstate, | ||
505 | struct nfsd4_layoutreturn *lrp) | ||
506 | { | ||
507 | struct nfs4_layout_stateid *ls, *n; | ||
508 | struct nfs4_client *clp = cstate->clp; | ||
509 | struct nfs4_layout *lp, *t; | ||
510 | LIST_HEAD(reaplist); | ||
511 | |||
512 | lrp->lrs_present = 0; | ||
513 | |||
514 | spin_lock(&clp->cl_lock); | ||
515 | list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) { | ||
516 | if (lrp->lr_return_type == RETURN_FSID && | ||
517 | !fh_fsid_match(&ls->ls_stid.sc_file->fi_fhandle, | ||
518 | &cstate->current_fh.fh_handle)) | ||
519 | continue; | ||
520 | |||
521 | spin_lock(&ls->ls_lock); | ||
522 | list_for_each_entry_safe(lp, t, &ls->ls_layouts, lo_perstate) { | ||
523 | if (lrp->lr_seg.iomode == IOMODE_ANY || | ||
524 | lrp->lr_seg.iomode == lp->lo_seg.iomode) | ||
525 | list_move_tail(&lp->lo_perstate, &reaplist); | ||
526 | } | ||
527 | spin_unlock(&ls->ls_lock); | ||
528 | } | ||
529 | spin_unlock(&clp->cl_lock); | ||
530 | |||
531 | nfsd4_free_layouts(&reaplist); | ||
532 | return 0; | ||
533 | } | ||
534 | |||
535 | static void | ||
536 | nfsd4_return_all_layouts(struct nfs4_layout_stateid *ls, | ||
537 | struct list_head *reaplist) | ||
538 | { | ||
539 | spin_lock(&ls->ls_lock); | ||
540 | list_splice_init(&ls->ls_layouts, reaplist); | ||
541 | spin_unlock(&ls->ls_lock); | ||
542 | } | ||
543 | |||
544 | void | ||
545 | nfsd4_return_all_client_layouts(struct nfs4_client *clp) | ||
546 | { | ||
547 | struct nfs4_layout_stateid *ls, *n; | ||
548 | LIST_HEAD(reaplist); | ||
549 | |||
550 | spin_lock(&clp->cl_lock); | ||
551 | list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) | ||
552 | nfsd4_return_all_layouts(ls, &reaplist); | ||
553 | spin_unlock(&clp->cl_lock); | ||
554 | |||
555 | nfsd4_free_layouts(&reaplist); | ||
556 | } | ||
557 | |||
558 | void | ||
559 | nfsd4_return_all_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp) | ||
560 | { | ||
561 | struct nfs4_layout_stateid *ls, *n; | ||
562 | LIST_HEAD(reaplist); | ||
563 | |||
564 | spin_lock(&fp->fi_lock); | ||
565 | list_for_each_entry_safe(ls, n, &fp->fi_lo_states, ls_perfile) { | ||
566 | if (ls->ls_stid.sc_client == clp) | ||
567 | nfsd4_return_all_layouts(ls, &reaplist); | ||
568 | } | ||
569 | spin_unlock(&fp->fi_lock); | ||
570 | |||
571 | nfsd4_free_layouts(&reaplist); | ||
572 | } | ||
573 | |||
574 | static void | ||
575 | nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls) | ||
576 | { | ||
577 | struct nfs4_client *clp = ls->ls_stid.sc_client; | ||
578 | char addr_str[INET6_ADDRSTRLEN]; | ||
579 | static char *envp[] = { | ||
580 | "HOME=/", | ||
581 | "TERM=linux", | ||
582 | "PATH=/sbin:/usr/sbin:/bin:/usr/bin", | ||
583 | NULL | ||
584 | }; | ||
585 | char *argv[8]; | ||
586 | int error; | ||
587 | |||
588 | rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str)); | ||
589 | |||
590 | nfsd4_cb_layout_fail(ls); | ||
591 | |||
592 | printk(KERN_WARNING | ||
593 | "nfsd: client %s failed to respond to layout recall. " | ||
594 | " Fencing..\n", addr_str); | ||
595 | |||
596 | argv[0] = "/sbin/nfsd-recall-failed"; | ||
597 | argv[1] = addr_str; | ||
598 | argv[2] = ls->ls_file->f_path.mnt->mnt_sb->s_id; | ||
599 | argv[3] = NULL; | ||
600 | |||
601 | error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); | ||
602 | if (error) { | ||
603 | printk(KERN_ERR "nfsd: fence failed for client %s: %d!\n", | ||
604 | addr_str, error); | ||
605 | } | ||
606 | } | ||
607 | |||
608 | static int | ||
609 | nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) | ||
610 | { | ||
611 | struct nfs4_layout_stateid *ls = | ||
612 | container_of(cb, struct nfs4_layout_stateid, ls_recall); | ||
613 | LIST_HEAD(reaplist); | ||
614 | |||
615 | switch (task->tk_status) { | ||
616 | case 0: | ||
617 | return 1; | ||
618 | case -NFS4ERR_NOMATCHING_LAYOUT: | ||
619 | trace_layout_recall_done(&ls->ls_stid.sc_stateid); | ||
620 | task->tk_status = 0; | ||
621 | return 1; | ||
622 | case -NFS4ERR_DELAY: | ||
623 | /* Poll the client until it's done with the layout */ | ||
624 | /* FIXME: cap number of retries. | ||
625 | * The pnfs standard states that we need to only expire | ||
626 | * the client after at-least "lease time" .eg lease-time * 2 | ||
627 | * when failing to communicate a recall | ||
628 | */ | ||
629 | rpc_delay(task, HZ/100); /* 10 mili-seconds */ | ||
630 | return 0; | ||
631 | default: | ||
632 | /* | ||
633 | * Unknown error or non-responding client, we'll need to fence. | ||
634 | */ | ||
635 | nfsd4_cb_layout_fail(ls); | ||
636 | return -1; | ||
637 | } | ||
638 | } | ||
639 | |||
640 | static void | ||
641 | nfsd4_cb_layout_release(struct nfsd4_callback *cb) | ||
642 | { | ||
643 | struct nfs4_layout_stateid *ls = | ||
644 | container_of(cb, struct nfs4_layout_stateid, ls_recall); | ||
645 | LIST_HEAD(reaplist); | ||
646 | |||
647 | trace_layout_recall_release(&ls->ls_stid.sc_stateid); | ||
648 | |||
649 | nfsd4_return_all_layouts(ls, &reaplist); | ||
650 | nfsd4_free_layouts(&reaplist); | ||
651 | nfs4_put_stid(&ls->ls_stid); | ||
652 | } | ||
653 | |||
654 | static struct nfsd4_callback_ops nfsd4_cb_layout_ops = { | ||
655 | .done = nfsd4_cb_layout_done, | ||
656 | .release = nfsd4_cb_layout_release, | ||
657 | }; | ||
658 | |||
659 | static bool | ||
660 | nfsd4_layout_lm_break(struct file_lock *fl) | ||
661 | { | ||
662 | /* | ||
663 | * We don't want the locks code to timeout the lease for us; | ||
664 | * we'll remove it ourself if a layout isn't returned | ||
665 | * in time: | ||
666 | */ | ||
667 | fl->fl_break_time = 0; | ||
668 | nfsd4_recall_file_layout(fl->fl_owner); | ||
669 | return false; | ||
670 | } | ||
671 | |||
672 | static int | ||
673 | nfsd4_layout_lm_change(struct file_lock *onlist, int arg, | ||
674 | struct list_head *dispose) | ||
675 | { | ||
676 | BUG_ON(!(arg & F_UNLCK)); | ||
677 | return lease_modify(onlist, arg, dispose); | ||
678 | } | ||
679 | |||
680 | static const struct lock_manager_operations nfsd4_layouts_lm_ops = { | ||
681 | .lm_break = nfsd4_layout_lm_break, | ||
682 | .lm_change = nfsd4_layout_lm_change, | ||
683 | }; | ||
684 | |||
685 | int | ||
686 | nfsd4_init_pnfs(void) | ||
687 | { | ||
688 | int i; | ||
689 | |||
690 | for (i = 0; i < DEVID_HASH_SIZE; i++) | ||
691 | INIT_LIST_HEAD(&nfsd_devid_hash[i]); | ||
692 | |||
693 | nfs4_layout_cache = kmem_cache_create("nfs4_layout", | ||
694 | sizeof(struct nfs4_layout), 0, 0, NULL); | ||
695 | if (!nfs4_layout_cache) | ||
696 | return -ENOMEM; | ||
697 | |||
698 | nfs4_layout_stateid_cache = kmem_cache_create("nfs4_layout_stateid", | ||
699 | sizeof(struct nfs4_layout_stateid), 0, 0, NULL); | ||
700 | if (!nfs4_layout_stateid_cache) { | ||
701 | kmem_cache_destroy(nfs4_layout_cache); | ||
702 | return -ENOMEM; | ||
703 | } | ||
704 | return 0; | ||
705 | } | ||
706 | |||
707 | void | ||
708 | nfsd4_exit_pnfs(void) | ||
709 | { | ||
710 | int i; | ||
711 | |||
712 | kmem_cache_destroy(nfs4_layout_cache); | ||
713 | kmem_cache_destroy(nfs4_layout_stateid_cache); | ||
714 | |||
715 | for (i = 0; i < DEVID_HASH_SIZE; i++) { | ||
716 | struct nfsd4_deviceid_map *map, *n; | ||
717 | |||
718 | list_for_each_entry_safe(map, n, &nfsd_devid_hash[i], hash) | ||
719 | kfree(map); | ||
720 | } | ||
721 | } | ||
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ac71d13c69ef..d30bea8d0277 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -43,6 +43,8 @@ | |||
43 | #include "current_stateid.h" | 43 | #include "current_stateid.h" |
44 | #include "netns.h" | 44 | #include "netns.h" |
45 | #include "acl.h" | 45 | #include "acl.h" |
46 | #include "pnfs.h" | ||
47 | #include "trace.h" | ||
46 | 48 | ||
47 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | 49 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL |
48 | #include <linux/security.h> | 50 | #include <linux/security.h> |
@@ -1178,6 +1180,259 @@ nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
1178 | return status == nfserr_same ? nfs_ok : status; | 1180 | return status == nfserr_same ? nfs_ok : status; |
1179 | } | 1181 | } |
1180 | 1182 | ||
1183 | #ifdef CONFIG_NFSD_PNFS | ||
1184 | static const struct nfsd4_layout_ops * | ||
1185 | nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type) | ||
1186 | { | ||
1187 | if (!exp->ex_layout_type) { | ||
1188 | dprintk("%s: export does not support pNFS\n", __func__); | ||
1189 | return NULL; | ||
1190 | } | ||
1191 | |||
1192 | if (exp->ex_layout_type != layout_type) { | ||
1193 | dprintk("%s: layout type %d not supported\n", | ||
1194 | __func__, layout_type); | ||
1195 | return NULL; | ||
1196 | } | ||
1197 | |||
1198 | return nfsd4_layout_ops[layout_type]; | ||
1199 | } | ||
1200 | |||
1201 | static __be32 | ||
1202 | nfsd4_getdeviceinfo(struct svc_rqst *rqstp, | ||
1203 | struct nfsd4_compound_state *cstate, | ||
1204 | struct nfsd4_getdeviceinfo *gdp) | ||
1205 | { | ||
1206 | const struct nfsd4_layout_ops *ops; | ||
1207 | struct nfsd4_deviceid_map *map; | ||
1208 | struct svc_export *exp; | ||
1209 | __be32 nfserr; | ||
1210 | |||
1211 | dprintk("%s: layout_type %u dev_id [0x%llx:0x%x] maxcnt %u\n", | ||
1212 | __func__, | ||
1213 | gdp->gd_layout_type, | ||
1214 | gdp->gd_devid.fsid_idx, gdp->gd_devid.generation, | ||
1215 | gdp->gd_maxcount); | ||
1216 | |||
1217 | map = nfsd4_find_devid_map(gdp->gd_devid.fsid_idx); | ||
1218 | if (!map) { | ||
1219 | dprintk("%s: couldn't find device ID to export mapping!\n", | ||
1220 | __func__); | ||
1221 | return nfserr_noent; | ||
1222 | } | ||
1223 | |||
1224 | exp = rqst_exp_find(rqstp, map->fsid_type, map->fsid); | ||
1225 | if (IS_ERR(exp)) { | ||
1226 | dprintk("%s: could not find device id\n", __func__); | ||
1227 | return nfserr_noent; | ||
1228 | } | ||
1229 | |||
1230 | nfserr = nfserr_layoutunavailable; | ||
1231 | ops = nfsd4_layout_verify(exp, gdp->gd_layout_type); | ||
1232 | if (!ops) | ||
1233 | goto out; | ||
1234 | |||
1235 | nfserr = nfs_ok; | ||
1236 | if (gdp->gd_maxcount != 0) | ||
1237 | nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp); | ||
1238 | |||
1239 | gdp->gd_notify_types &= ops->notify_types; | ||
1240 | exp_put(exp); | ||
1241 | out: | ||
1242 | return nfserr; | ||
1243 | } | ||
1244 | |||
1245 | static __be32 | ||
1246 | nfsd4_layoutget(struct svc_rqst *rqstp, | ||
1247 | struct nfsd4_compound_state *cstate, | ||
1248 | struct nfsd4_layoutget *lgp) | ||
1249 | { | ||
1250 | struct svc_fh *current_fh = &cstate->current_fh; | ||
1251 | const struct nfsd4_layout_ops *ops; | ||
1252 | struct nfs4_layout_stateid *ls; | ||
1253 | __be32 nfserr; | ||
1254 | int accmode; | ||
1255 | |||
1256 | switch (lgp->lg_seg.iomode) { | ||
1257 | case IOMODE_READ: | ||
1258 | accmode = NFSD_MAY_READ; | ||
1259 | break; | ||
1260 | case IOMODE_RW: | ||
1261 | accmode = NFSD_MAY_READ | NFSD_MAY_WRITE; | ||
1262 | break; | ||
1263 | default: | ||
1264 | dprintk("%s: invalid iomode %d\n", | ||
1265 | __func__, lgp->lg_seg.iomode); | ||
1266 | nfserr = nfserr_badiomode; | ||
1267 | goto out; | ||
1268 | } | ||
1269 | |||
1270 | nfserr = fh_verify(rqstp, current_fh, 0, accmode); | ||
1271 | if (nfserr) | ||
1272 | goto out; | ||
1273 | |||
1274 | nfserr = nfserr_layoutunavailable; | ||
1275 | ops = nfsd4_layout_verify(current_fh->fh_export, lgp->lg_layout_type); | ||
1276 | if (!ops) | ||
1277 | goto out; | ||
1278 | |||
1279 | /* | ||
1280 | * Verify minlength and range as per RFC5661: | ||
1281 | * o If loga_length is less than loga_minlength, | ||
1282 | * the metadata server MUST return NFS4ERR_INVAL. | ||
1283 | * o If the sum of loga_offset and loga_minlength exceeds | ||
1284 | * NFS4_UINT64_MAX, and loga_minlength is not | ||
1285 | * NFS4_UINT64_MAX, the error NFS4ERR_INVAL MUST result. | ||
1286 | * o If the sum of loga_offset and loga_length exceeds | ||
1287 | * NFS4_UINT64_MAX, and loga_length is not NFS4_UINT64_MAX, | ||
1288 | * the error NFS4ERR_INVAL MUST result. | ||
1289 | */ | ||
1290 | nfserr = nfserr_inval; | ||
1291 | if (lgp->lg_seg.length < lgp->lg_minlength || | ||
1292 | (lgp->lg_minlength != NFS4_MAX_UINT64 && | ||
1293 | lgp->lg_minlength > NFS4_MAX_UINT64 - lgp->lg_seg.offset) || | ||
1294 | (lgp->lg_seg.length != NFS4_MAX_UINT64 && | ||
1295 | lgp->lg_seg.length > NFS4_MAX_UINT64 - lgp->lg_seg.offset)) | ||
1296 | goto out; | ||
1297 | if (lgp->lg_seg.length == 0) | ||
1298 | goto out; | ||
1299 | |||
1300 | nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lgp->lg_sid, | ||
1301 | true, lgp->lg_layout_type, &ls); | ||
1302 | if (nfserr) { | ||
1303 | trace_layout_get_lookup_fail(&lgp->lg_sid); | ||
1304 | goto out; | ||
1305 | } | ||
1306 | |||
1307 | nfserr = nfserr_recallconflict; | ||
1308 | if (atomic_read(&ls->ls_stid.sc_file->fi_lo_recalls)) | ||
1309 | goto out_put_stid; | ||
1310 | |||
1311 | nfserr = ops->proc_layoutget(current_fh->fh_dentry->d_inode, | ||
1312 | current_fh, lgp); | ||
1313 | if (nfserr) | ||
1314 | goto out_put_stid; | ||
1315 | |||
1316 | nfserr = nfsd4_insert_layout(lgp, ls); | ||
1317 | |||
1318 | out_put_stid: | ||
1319 | nfs4_put_stid(&ls->ls_stid); | ||
1320 | out: | ||
1321 | return nfserr; | ||
1322 | } | ||
1323 | |||
1324 | static __be32 | ||
1325 | nfsd4_layoutcommit(struct svc_rqst *rqstp, | ||
1326 | struct nfsd4_compound_state *cstate, | ||
1327 | struct nfsd4_layoutcommit *lcp) | ||
1328 | { | ||
1329 | const struct nfsd4_layout_seg *seg = &lcp->lc_seg; | ||
1330 | struct svc_fh *current_fh = &cstate->current_fh; | ||
1331 | const struct nfsd4_layout_ops *ops; | ||
1332 | loff_t new_size = lcp->lc_last_wr + 1; | ||
1333 | struct inode *inode; | ||
1334 | struct nfs4_layout_stateid *ls; | ||
1335 | __be32 nfserr; | ||
1336 | |||
1337 | nfserr = fh_verify(rqstp, current_fh, 0, NFSD_MAY_WRITE); | ||
1338 | if (nfserr) | ||
1339 | goto out; | ||
1340 | |||
1341 | nfserr = nfserr_layoutunavailable; | ||
1342 | ops = nfsd4_layout_verify(current_fh->fh_export, lcp->lc_layout_type); | ||
1343 | if (!ops) | ||
1344 | goto out; | ||
1345 | inode = current_fh->fh_dentry->d_inode; | ||
1346 | |||
1347 | nfserr = nfserr_inval; | ||
1348 | if (new_size <= seg->offset) { | ||
1349 | dprintk("pnfsd: last write before layout segment\n"); | ||
1350 | goto out; | ||
1351 | } | ||
1352 | if (new_size > seg->offset + seg->length) { | ||
1353 | dprintk("pnfsd: last write beyond layout segment\n"); | ||
1354 | goto out; | ||
1355 | } | ||
1356 | if (!lcp->lc_newoffset && new_size > i_size_read(inode)) { | ||
1357 | dprintk("pnfsd: layoutcommit beyond EOF\n"); | ||
1358 | goto out; | ||
1359 | } | ||
1360 | |||
1361 | nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lcp->lc_sid, | ||
1362 | false, lcp->lc_layout_type, | ||
1363 | &ls); | ||
1364 | if (nfserr) { | ||
1365 | trace_layout_commit_lookup_fail(&lcp->lc_sid); | ||
1366 | /* fixup error code as per RFC5661 */ | ||
1367 | if (nfserr == nfserr_bad_stateid) | ||
1368 | nfserr = nfserr_badlayout; | ||
1369 | goto out; | ||
1370 | } | ||
1371 | |||
1372 | nfserr = ops->proc_layoutcommit(inode, lcp); | ||
1373 | if (nfserr) | ||
1374 | goto out_put_stid; | ||
1375 | |||
1376 | if (new_size > i_size_read(inode)) { | ||
1377 | lcp->lc_size_chg = 1; | ||
1378 | lcp->lc_newsize = new_size; | ||
1379 | } else { | ||
1380 | lcp->lc_size_chg = 0; | ||
1381 | } | ||
1382 | |||
1383 | out_put_stid: | ||
1384 | nfs4_put_stid(&ls->ls_stid); | ||
1385 | out: | ||
1386 | return nfserr; | ||
1387 | } | ||
1388 | |||
1389 | static __be32 | ||
1390 | nfsd4_layoutreturn(struct svc_rqst *rqstp, | ||
1391 | struct nfsd4_compound_state *cstate, | ||
1392 | struct nfsd4_layoutreturn *lrp) | ||
1393 | { | ||
1394 | struct svc_fh *current_fh = &cstate->current_fh; | ||
1395 | __be32 nfserr; | ||
1396 | |||
1397 | nfserr = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP); | ||
1398 | if (nfserr) | ||
1399 | goto out; | ||
1400 | |||
1401 | nfserr = nfserr_layoutunavailable; | ||
1402 | if (!nfsd4_layout_verify(current_fh->fh_export, lrp->lr_layout_type)) | ||
1403 | goto out; | ||
1404 | |||
1405 | switch (lrp->lr_seg.iomode) { | ||
1406 | case IOMODE_READ: | ||
1407 | case IOMODE_RW: | ||
1408 | case IOMODE_ANY: | ||
1409 | break; | ||
1410 | default: | ||
1411 | dprintk("%s: invalid iomode %d\n", __func__, | ||
1412 | lrp->lr_seg.iomode); | ||
1413 | nfserr = nfserr_inval; | ||
1414 | goto out; | ||
1415 | } | ||
1416 | |||
1417 | switch (lrp->lr_return_type) { | ||
1418 | case RETURN_FILE: | ||
1419 | nfserr = nfsd4_return_file_layouts(rqstp, cstate, lrp); | ||
1420 | break; | ||
1421 | case RETURN_FSID: | ||
1422 | case RETURN_ALL: | ||
1423 | nfserr = nfsd4_return_client_layouts(rqstp, cstate, lrp); | ||
1424 | break; | ||
1425 | default: | ||
1426 | dprintk("%s: invalid return_type %d\n", __func__, | ||
1427 | lrp->lr_return_type); | ||
1428 | nfserr = nfserr_inval; | ||
1429 | break; | ||
1430 | } | ||
1431 | out: | ||
1432 | return nfserr; | ||
1433 | } | ||
1434 | #endif /* CONFIG_NFSD_PNFS */ | ||
1435 | |||
1181 | /* | 1436 | /* |
1182 | * NULL call. | 1437 | * NULL call. |
1183 | */ | 1438 | */ |
@@ -1679,6 +1934,36 @@ static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd | |||
1679 | op_encode_channel_attrs_maxsz) * sizeof(__be32); | 1934 | op_encode_channel_attrs_maxsz) * sizeof(__be32); |
1680 | } | 1935 | } |
1681 | 1936 | ||
1937 | #ifdef CONFIG_NFSD_PNFS | ||
1938 | /* | ||
1939 | * At this stage we don't really know what layout driver will handle the request, | ||
1940 | * so we need to define an arbitrary upper bound here. | ||
1941 | */ | ||
1942 | #define MAX_LAYOUT_SIZE 128 | ||
1943 | static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
1944 | { | ||
1945 | return (op_encode_hdr_size + | ||
1946 | 1 /* logr_return_on_close */ + | ||
1947 | op_encode_stateid_maxsz + | ||
1948 | 1 /* nr of layouts */ + | ||
1949 | MAX_LAYOUT_SIZE) * sizeof(__be32); | ||
1950 | } | ||
1951 | |||
1952 | static inline u32 nfsd4_layoutcommit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
1953 | { | ||
1954 | return (op_encode_hdr_size + | ||
1955 | 1 /* locr_newsize */ + | ||
1956 | 2 /* ns_size */) * sizeof(__be32); | ||
1957 | } | ||
1958 | |||
1959 | static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||
1960 | { | ||
1961 | return (op_encode_hdr_size + | ||
1962 | 1 /* lrs_stateid */ + | ||
1963 | op_encode_stateid_maxsz) * sizeof(__be32); | ||
1964 | } | ||
1965 | #endif /* CONFIG_NFSD_PNFS */ | ||
1966 | |||
1682 | static struct nfsd4_operation nfsd4_ops[] = { | 1967 | static struct nfsd4_operation nfsd4_ops[] = { |
1683 | [OP_ACCESS] = { | 1968 | [OP_ACCESS] = { |
1684 | .op_func = (nfsd4op_func)nfsd4_access, | 1969 | .op_func = (nfsd4op_func)nfsd4_access, |
@@ -1966,6 +2251,31 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
1966 | .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, | 2251 | .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, |
1967 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, | 2252 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, |
1968 | }, | 2253 | }, |
2254 | #ifdef CONFIG_NFSD_PNFS | ||
2255 | [OP_GETDEVICEINFO] = { | ||
2256 | .op_func = (nfsd4op_func)nfsd4_getdeviceinfo, | ||
2257 | .op_flags = ALLOWED_WITHOUT_FH, | ||
2258 | .op_name = "OP_GETDEVICEINFO", | ||
2259 | }, | ||
2260 | [OP_LAYOUTGET] = { | ||
2261 | .op_func = (nfsd4op_func)nfsd4_layoutget, | ||
2262 | .op_flags = OP_MODIFIES_SOMETHING, | ||
2263 | .op_name = "OP_LAYOUTGET", | ||
2264 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutget_rsize, | ||
2265 | }, | ||
2266 | [OP_LAYOUTCOMMIT] = { | ||
2267 | .op_func = (nfsd4op_func)nfsd4_layoutcommit, | ||
2268 | .op_flags = OP_MODIFIES_SOMETHING, | ||
2269 | .op_name = "OP_LAYOUTCOMMIT", | ||
2270 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutcommit_rsize, | ||
2271 | }, | ||
2272 | [OP_LAYOUTRETURN] = { | ||
2273 | .op_func = (nfsd4op_func)nfsd4_layoutreturn, | ||
2274 | .op_flags = OP_MODIFIES_SOMETHING, | ||
2275 | .op_name = "OP_LAYOUTRETURN", | ||
2276 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutreturn_rsize, | ||
2277 | }, | ||
2278 | #endif /* CONFIG_NFSD_PNFS */ | ||
1969 | 2279 | ||
1970 | /* NFSv4.2 operations */ | 2280 | /* NFSv4.2 operations */ |
1971 | [OP_ALLOCATE] = { | 2281 | [OP_ALLOCATE] = { |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c06a1ba80d73..f6b2a09f793f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include "current_stateid.h" | 48 | #include "current_stateid.h" |
49 | 49 | ||
50 | #include "netns.h" | 50 | #include "netns.h" |
51 | #include "pnfs.h" | ||
51 | 52 | ||
52 | #define NFSDDBG_FACILITY NFSDDBG_PROC | 53 | #define NFSDDBG_FACILITY NFSDDBG_PROC |
53 | 54 | ||
@@ -150,16 +151,6 @@ renew_client_locked(struct nfs4_client *clp) | |||
150 | clp->cl_time = get_seconds(); | 151 | clp->cl_time = get_seconds(); |
151 | } | 152 | } |
152 | 153 | ||
153 | static inline void | ||
154 | renew_client(struct nfs4_client *clp) | ||
155 | { | ||
156 | struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); | ||
157 | |||
158 | spin_lock(&nn->client_lock); | ||
159 | renew_client_locked(clp); | ||
160 | spin_unlock(&nn->client_lock); | ||
161 | } | ||
162 | |||
163 | static void put_client_renew_locked(struct nfs4_client *clp) | 154 | static void put_client_renew_locked(struct nfs4_client *clp) |
164 | { | 155 | { |
165 | struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); | 156 | struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); |
@@ -282,7 +273,7 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu) | |||
282 | kmem_cache_free(file_slab, fp); | 273 | kmem_cache_free(file_slab, fp); |
283 | } | 274 | } |
284 | 275 | ||
285 | static inline void | 276 | void |
286 | put_nfs4_file(struct nfs4_file *fi) | 277 | put_nfs4_file(struct nfs4_file *fi) |
287 | { | 278 | { |
288 | might_lock(&state_lock); | 279 | might_lock(&state_lock); |
@@ -295,12 +286,6 @@ put_nfs4_file(struct nfs4_file *fi) | |||
295 | } | 286 | } |
296 | } | 287 | } |
297 | 288 | ||
298 | static inline void | ||
299 | get_nfs4_file(struct nfs4_file *fi) | ||
300 | { | ||
301 | atomic_inc(&fi->fi_ref); | ||
302 | } | ||
303 | |||
304 | static struct file * | 289 | static struct file * |
305 | __nfs4_get_fd(struct nfs4_file *f, int oflag) | 290 | __nfs4_get_fd(struct nfs4_file *f, int oflag) |
306 | { | 291 | { |
@@ -358,7 +343,7 @@ find_readable_file(struct nfs4_file *f) | |||
358 | return ret; | 343 | return ret; |
359 | } | 344 | } |
360 | 345 | ||
361 | static struct file * | 346 | struct file * |
362 | find_any_file(struct nfs4_file *f) | 347 | find_any_file(struct nfs4_file *f) |
363 | { | 348 | { |
364 | struct file *ret; | 349 | struct file *ret; |
@@ -408,14 +393,6 @@ static unsigned int file_hashval(struct knfsd_fh *fh) | |||
408 | return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1); | 393 | return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1); |
409 | } | 394 | } |
410 | 395 | ||
411 | static bool nfsd_fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) | ||
412 | { | ||
413 | return fh1->fh_size == fh2->fh_size && | ||
414 | !memcmp(fh1->fh_base.fh_pad, | ||
415 | fh2->fh_base.fh_pad, | ||
416 | fh1->fh_size); | ||
417 | } | ||
418 | |||
419 | static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; | 396 | static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; |
420 | 397 | ||
421 | static void | 398 | static void |
@@ -494,7 +471,7 @@ static void nfs4_file_put_access(struct nfs4_file *fp, u32 access) | |||
494 | __nfs4_file_put_access(fp, O_RDONLY); | 471 | __nfs4_file_put_access(fp, O_RDONLY); |
495 | } | 472 | } |
496 | 473 | ||
497 | static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, | 474 | struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, |
498 | struct kmem_cache *slab) | 475 | struct kmem_cache *slab) |
499 | { | 476 | { |
500 | struct nfs4_stid *stid; | 477 | struct nfs4_stid *stid; |
@@ -688,17 +665,17 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp) | |||
688 | struct file *filp = NULL; | 665 | struct file *filp = NULL; |
689 | 666 | ||
690 | spin_lock(&fp->fi_lock); | 667 | spin_lock(&fp->fi_lock); |
691 | if (fp->fi_deleg_file && atomic_dec_and_test(&fp->fi_delegees)) | 668 | if (fp->fi_deleg_file && --fp->fi_delegees == 0) |
692 | swap(filp, fp->fi_deleg_file); | 669 | swap(filp, fp->fi_deleg_file); |
693 | spin_unlock(&fp->fi_lock); | 670 | spin_unlock(&fp->fi_lock); |
694 | 671 | ||
695 | if (filp) { | 672 | if (filp) { |
696 | vfs_setlease(filp, F_UNLCK, NULL, NULL); | 673 | vfs_setlease(filp, F_UNLCK, NULL, (void **)&fp); |
697 | fput(filp); | 674 | fput(filp); |
698 | } | 675 | } |
699 | } | 676 | } |
700 | 677 | ||
701 | static void unhash_stid(struct nfs4_stid *s) | 678 | void nfs4_unhash_stid(struct nfs4_stid *s) |
702 | { | 679 | { |
703 | s->sc_type = 0; | 680 | s->sc_type = 0; |
704 | } | 681 | } |
@@ -1006,7 +983,7 @@ static void unhash_lock_stateid(struct nfs4_ol_stateid *stp) | |||
1006 | 983 | ||
1007 | list_del_init(&stp->st_locks); | 984 | list_del_init(&stp->st_locks); |
1008 | unhash_ol_stateid(stp); | 985 | unhash_ol_stateid(stp); |
1009 | unhash_stid(&stp->st_stid); | 986 | nfs4_unhash_stid(&stp->st_stid); |
1010 | } | 987 | } |
1011 | 988 | ||
1012 | static void release_lock_stateid(struct nfs4_ol_stateid *stp) | 989 | static void release_lock_stateid(struct nfs4_ol_stateid *stp) |
@@ -1518,7 +1495,12 @@ unhash_session(struct nfsd4_session *ses) | |||
1518 | static int | 1495 | static int |
1519 | STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) | 1496 | STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) |
1520 | { | 1497 | { |
1521 | if (clid->cl_boot == nn->boot_time) | 1498 | /* |
1499 | * We're assuming the clid was not given out from a boot | ||
1500 | * precisely 2^32 (about 136 years) before this one. That seems | ||
1501 | * a safe assumption: | ||
1502 | */ | ||
1503 | if (clid->cl_boot == (u32)nn->boot_time) | ||
1522 | return 0; | 1504 | return 0; |
1523 | dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n", | 1505 | dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n", |
1524 | clid->cl_boot, clid->cl_id, nn->boot_time); | 1506 | clid->cl_boot, clid->cl_id, nn->boot_time); |
@@ -1558,6 +1540,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) | |||
1558 | INIT_LIST_HEAD(&clp->cl_lru); | 1540 | INIT_LIST_HEAD(&clp->cl_lru); |
1559 | INIT_LIST_HEAD(&clp->cl_callbacks); | 1541 | INIT_LIST_HEAD(&clp->cl_callbacks); |
1560 | INIT_LIST_HEAD(&clp->cl_revoked); | 1542 | INIT_LIST_HEAD(&clp->cl_revoked); |
1543 | #ifdef CONFIG_NFSD_PNFS | ||
1544 | INIT_LIST_HEAD(&clp->cl_lo_states); | ||
1545 | #endif | ||
1561 | spin_lock_init(&clp->cl_lock); | 1546 | spin_lock_init(&clp->cl_lock); |
1562 | rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); | 1547 | rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); |
1563 | return clp; | 1548 | return clp; |
@@ -1662,6 +1647,7 @@ __destroy_client(struct nfs4_client *clp) | |||
1662 | nfs4_get_stateowner(&oo->oo_owner); | 1647 | nfs4_get_stateowner(&oo->oo_owner); |
1663 | release_openowner(oo); | 1648 | release_openowner(oo); |
1664 | } | 1649 | } |
1650 | nfsd4_return_all_client_layouts(clp); | ||
1665 | nfsd4_shutdown_callback(clp); | 1651 | nfsd4_shutdown_callback(clp); |
1666 | if (clp->cl_cb_conn.cb_xprt) | 1652 | if (clp->cl_cb_conn.cb_xprt) |
1667 | svc_xprt_put(clp->cl_cb_conn.cb_xprt); | 1653 | svc_xprt_put(clp->cl_cb_conn.cb_xprt); |
@@ -2145,8 +2131,11 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, | |||
2145 | static void | 2131 | static void |
2146 | nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) | 2132 | nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) |
2147 | { | 2133 | { |
2148 | /* pNFS is not supported */ | 2134 | #ifdef CONFIG_NFSD_PNFS |
2135 | new->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_MDS; | ||
2136 | #else | ||
2149 | new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS; | 2137 | new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS; |
2138 | #endif | ||
2150 | 2139 | ||
2151 | /* Referrals are supported, Migration is not. */ | 2140 | /* Referrals are supported, Migration is not. */ |
2152 | new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER; | 2141 | new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER; |
@@ -3074,6 +3063,10 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval, | |||
3074 | fp->fi_share_deny = 0; | 3063 | fp->fi_share_deny = 0; |
3075 | memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); | 3064 | memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); |
3076 | memset(fp->fi_access, 0, sizeof(fp->fi_access)); | 3065 | memset(fp->fi_access, 0, sizeof(fp->fi_access)); |
3066 | #ifdef CONFIG_NFSD_PNFS | ||
3067 | INIT_LIST_HEAD(&fp->fi_lo_states); | ||
3068 | atomic_set(&fp->fi_lo_recalls, 0); | ||
3069 | #endif | ||
3077 | hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]); | 3070 | hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]); |
3078 | } | 3071 | } |
3079 | 3072 | ||
@@ -3300,7 +3293,7 @@ find_file_locked(struct knfsd_fh *fh, unsigned int hashval) | |||
3300 | struct nfs4_file *fp; | 3293 | struct nfs4_file *fp; |
3301 | 3294 | ||
3302 | hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) { | 3295 | hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) { |
3303 | if (nfsd_fh_match(&fp->fi_fhandle, fh)) { | 3296 | if (fh_match(&fp->fi_fhandle, fh)) { |
3304 | if (atomic_inc_not_zero(&fp->fi_ref)) | 3297 | if (atomic_inc_not_zero(&fp->fi_ref)) |
3305 | return fp; | 3298 | return fp; |
3306 | } | 3299 | } |
@@ -3308,7 +3301,7 @@ find_file_locked(struct knfsd_fh *fh, unsigned int hashval) | |||
3308 | return NULL; | 3301 | return NULL; |
3309 | } | 3302 | } |
3310 | 3303 | ||
3311 | static struct nfs4_file * | 3304 | struct nfs4_file * |
3312 | find_file(struct knfsd_fh *fh) | 3305 | find_file(struct knfsd_fh *fh) |
3313 | { | 3306 | { |
3314 | struct nfs4_file *fp; | 3307 | struct nfs4_file *fp; |
@@ -3477,7 +3470,8 @@ nfsd_break_deleg_cb(struct file_lock *fl) | |||
3477 | } | 3470 | } |
3478 | 3471 | ||
3479 | static int | 3472 | static int |
3480 | nfsd_change_deleg_cb(struct file_lock **onlist, int arg, struct list_head *dispose) | 3473 | nfsd_change_deleg_cb(struct file_lock *onlist, int arg, |
3474 | struct list_head *dispose) | ||
3481 | { | 3475 | { |
3482 | if (arg & F_UNLCK) | 3476 | if (arg & F_UNLCK) |
3483 | return lease_modify(onlist, arg, dispose); | 3477 | return lease_modify(onlist, arg, dispose); |
@@ -3855,12 +3849,12 @@ static int nfs4_setlease(struct nfs4_delegation *dp) | |||
3855 | /* Race breaker */ | 3849 | /* Race breaker */ |
3856 | if (fp->fi_deleg_file) { | 3850 | if (fp->fi_deleg_file) { |
3857 | status = 0; | 3851 | status = 0; |
3858 | atomic_inc(&fp->fi_delegees); | 3852 | ++fp->fi_delegees; |
3859 | hash_delegation_locked(dp, fp); | 3853 | hash_delegation_locked(dp, fp); |
3860 | goto out_unlock; | 3854 | goto out_unlock; |
3861 | } | 3855 | } |
3862 | fp->fi_deleg_file = filp; | 3856 | fp->fi_deleg_file = filp; |
3863 | atomic_set(&fp->fi_delegees, 1); | 3857 | fp->fi_delegees = 1; |
3864 | hash_delegation_locked(dp, fp); | 3858 | hash_delegation_locked(dp, fp); |
3865 | spin_unlock(&fp->fi_lock); | 3859 | spin_unlock(&fp->fi_lock); |
3866 | spin_unlock(&state_lock); | 3860 | spin_unlock(&state_lock); |
@@ -3901,7 +3895,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, | |||
3901 | status = -EAGAIN; | 3895 | status = -EAGAIN; |
3902 | goto out_unlock; | 3896 | goto out_unlock; |
3903 | } | 3897 | } |
3904 | atomic_inc(&fp->fi_delegees); | 3898 | ++fp->fi_delegees; |
3905 | hash_delegation_locked(dp, fp); | 3899 | hash_delegation_locked(dp, fp); |
3906 | status = 0; | 3900 | status = 0; |
3907 | out_unlock: | 3901 | out_unlock: |
@@ -4294,7 +4288,7 @@ laundromat_main(struct work_struct *laundry) | |||
4294 | 4288 | ||
4295 | static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) | 4289 | static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) |
4296 | { | 4290 | { |
4297 | if (!nfsd_fh_match(&fhp->fh_handle, &stp->st_stid.sc_file->fi_fhandle)) | 4291 | if (!fh_match(&fhp->fh_handle, &stp->st_stid.sc_file->fi_fhandle)) |
4298 | return nfserr_bad_stateid; | 4292 | return nfserr_bad_stateid; |
4299 | return nfs_ok; | 4293 | return nfs_ok; |
4300 | } | 4294 | } |
@@ -4445,7 +4439,7 @@ out_unlock: | |||
4445 | return status; | 4439 | return status; |
4446 | } | 4440 | } |
4447 | 4441 | ||
4448 | static __be32 | 4442 | __be32 |
4449 | nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, | 4443 | nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, |
4450 | stateid_t *stateid, unsigned char typemask, | 4444 | stateid_t *stateid, unsigned char typemask, |
4451 | struct nfs4_stid **s, struct nfsd_net *nn) | 4445 | struct nfs4_stid **s, struct nfsd_net *nn) |
@@ -4859,6 +4853,9 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4859 | update_stateid(&stp->st_stid.sc_stateid); | 4853 | update_stateid(&stp->st_stid.sc_stateid); |
4860 | memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); | 4854 | memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); |
4861 | 4855 | ||
4856 | nfsd4_return_all_file_layouts(stp->st_stateowner->so_client, | ||
4857 | stp->st_stid.sc_file); | ||
4858 | |||
4862 | nfsd4_close_open_stateid(stp); | 4859 | nfsd4_close_open_stateid(stp); |
4863 | 4860 | ||
4864 | /* put reference from nfs4_preprocess_seqid_op */ | 4861 | /* put reference from nfs4_preprocess_seqid_op */ |
@@ -5556,10 +5553,11 @@ out_nfserr: | |||
5556 | static bool | 5553 | static bool |
5557 | check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) | 5554 | check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) |
5558 | { | 5555 | { |
5559 | struct file_lock **flpp; | 5556 | struct file_lock *fl; |
5560 | int status = false; | 5557 | int status = false; |
5561 | struct file *filp = find_any_file(fp); | 5558 | struct file *filp = find_any_file(fp); |
5562 | struct inode *inode; | 5559 | struct inode *inode; |
5560 | struct file_lock_context *flctx; | ||
5563 | 5561 | ||
5564 | if (!filp) { | 5562 | if (!filp) { |
5565 | /* Any valid lock stateid should have some sort of access */ | 5563 | /* Any valid lock stateid should have some sort of access */ |
@@ -5568,15 +5566,18 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) | |||
5568 | } | 5566 | } |
5569 | 5567 | ||
5570 | inode = file_inode(filp); | 5568 | inode = file_inode(filp); |
5569 | flctx = inode->i_flctx; | ||
5571 | 5570 | ||
5572 | spin_lock(&inode->i_lock); | 5571 | if (flctx && !list_empty_careful(&flctx->flc_posix)) { |
5573 | for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { | 5572 | spin_lock(&flctx->flc_lock); |
5574 | if ((*flpp)->fl_owner == (fl_owner_t)lowner) { | 5573 | list_for_each_entry(fl, &flctx->flc_posix, fl_list) { |
5575 | status = true; | 5574 | if (fl->fl_owner == (fl_owner_t)lowner) { |
5576 | break; | 5575 | status = true; |
5576 | break; | ||
5577 | } | ||
5577 | } | 5578 | } |
5579 | spin_unlock(&flctx->flc_lock); | ||
5578 | } | 5580 | } |
5579 | spin_unlock(&inode->i_lock); | ||
5580 | fput(filp); | 5581 | fput(filp); |
5581 | return status; | 5582 | return status; |
5582 | } | 5583 | } |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 15f7b73e0c0f..df5e66caf100 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include "state.h" | 47 | #include "state.h" |
48 | #include "cache.h" | 48 | #include "cache.h" |
49 | #include "netns.h" | 49 | #include "netns.h" |
50 | #include "pnfs.h" | ||
50 | 51 | ||
51 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | 52 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL |
52 | #include <linux/security.h> | 53 | #include <linux/security.h> |
@@ -234,6 +235,26 @@ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) | |||
234 | return ret; | 235 | return ret; |
235 | } | 236 | } |
236 | 237 | ||
238 | /* | ||
239 | * We require the high 32 bits of 'seconds' to be 0, and | ||
240 | * we ignore all 32 bits of 'nseconds'. | ||
241 | */ | ||
242 | static __be32 | ||
243 | nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec *tv) | ||
244 | { | ||
245 | DECODE_HEAD; | ||
246 | u64 sec; | ||
247 | |||
248 | READ_BUF(12); | ||
249 | p = xdr_decode_hyper(p, &sec); | ||
250 | tv->tv_sec = sec; | ||
251 | tv->tv_nsec = be32_to_cpup(p++); | ||
252 | if (tv->tv_nsec >= (u32)1000000000) | ||
253 | return nfserr_inval; | ||
254 | |||
255 | DECODE_TAIL; | ||
256 | } | ||
257 | |||
237 | static __be32 | 258 | static __be32 |
238 | nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) | 259 | nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) |
239 | { | 260 | { |
@@ -267,7 +288,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, | |||
267 | { | 288 | { |
268 | int expected_len, len = 0; | 289 | int expected_len, len = 0; |
269 | u32 dummy32; | 290 | u32 dummy32; |
270 | u64 sec; | ||
271 | char *buf; | 291 | char *buf; |
272 | 292 | ||
273 | DECODE_HEAD; | 293 | DECODE_HEAD; |
@@ -358,15 +378,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, | |||
358 | dummy32 = be32_to_cpup(p++); | 378 | dummy32 = be32_to_cpup(p++); |
359 | switch (dummy32) { | 379 | switch (dummy32) { |
360 | case NFS4_SET_TO_CLIENT_TIME: | 380 | case NFS4_SET_TO_CLIENT_TIME: |
361 | /* We require the high 32 bits of 'seconds' to be 0, and we ignore | ||
362 | all 32 bits of 'nseconds'. */ | ||
363 | READ_BUF(12); | ||
364 | len += 12; | 381 | len += 12; |
365 | p = xdr_decode_hyper(p, &sec); | 382 | status = nfsd4_decode_time(argp, &iattr->ia_atime); |
366 | iattr->ia_atime.tv_sec = (time_t)sec; | 383 | if (status) |
367 | iattr->ia_atime.tv_nsec = be32_to_cpup(p++); | 384 | return status; |
368 | if (iattr->ia_atime.tv_nsec >= (u32)1000000000) | ||
369 | return nfserr_inval; | ||
370 | iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); | 385 | iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); |
371 | break; | 386 | break; |
372 | case NFS4_SET_TO_SERVER_TIME: | 387 | case NFS4_SET_TO_SERVER_TIME: |
@@ -382,15 +397,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, | |||
382 | dummy32 = be32_to_cpup(p++); | 397 | dummy32 = be32_to_cpup(p++); |
383 | switch (dummy32) { | 398 | switch (dummy32) { |
384 | case NFS4_SET_TO_CLIENT_TIME: | 399 | case NFS4_SET_TO_CLIENT_TIME: |
385 | /* We require the high 32 bits of 'seconds' to be 0, and we ignore | ||
386 | all 32 bits of 'nseconds'. */ | ||
387 | READ_BUF(12); | ||
388 | len += 12; | 400 | len += 12; |
389 | p = xdr_decode_hyper(p, &sec); | 401 | status = nfsd4_decode_time(argp, &iattr->ia_mtime); |
390 | iattr->ia_mtime.tv_sec = sec; | 402 | if (status) |
391 | iattr->ia_mtime.tv_nsec = be32_to_cpup(p++); | 403 | return status; |
392 | if (iattr->ia_mtime.tv_nsec >= (u32)1000000000) | ||
393 | return nfserr_inval; | ||
394 | iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); | 404 | iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); |
395 | break; | 405 | break; |
396 | case NFS4_SET_TO_SERVER_TIME: | 406 | case NFS4_SET_TO_SERVER_TIME: |
@@ -1513,6 +1523,127 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str | |||
1513 | DECODE_TAIL; | 1523 | DECODE_TAIL; |
1514 | } | 1524 | } |
1515 | 1525 | ||
1526 | #ifdef CONFIG_NFSD_PNFS | ||
1527 | static __be32 | ||
1528 | nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, | ||
1529 | struct nfsd4_getdeviceinfo *gdev) | ||
1530 | { | ||
1531 | DECODE_HEAD; | ||
1532 | u32 num, i; | ||
1533 | |||
1534 | READ_BUF(sizeof(struct nfsd4_deviceid) + 3 * 4); | ||
1535 | COPYMEM(&gdev->gd_devid, sizeof(struct nfsd4_deviceid)); | ||
1536 | gdev->gd_layout_type = be32_to_cpup(p++); | ||
1537 | gdev->gd_maxcount = be32_to_cpup(p++); | ||
1538 | num = be32_to_cpup(p++); | ||
1539 | if (num) { | ||
1540 | READ_BUF(4 * num); | ||
1541 | gdev->gd_notify_types = be32_to_cpup(p++); | ||
1542 | for (i = 1; i < num; i++) { | ||
1543 | if (be32_to_cpup(p++)) { | ||
1544 | status = nfserr_inval; | ||
1545 | goto out; | ||
1546 | } | ||
1547 | } | ||
1548 | } | ||
1549 | DECODE_TAIL; | ||
1550 | } | ||
1551 | |||
1552 | static __be32 | ||
1553 | nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, | ||
1554 | struct nfsd4_layoutget *lgp) | ||
1555 | { | ||
1556 | DECODE_HEAD; | ||
1557 | |||
1558 | READ_BUF(36); | ||
1559 | lgp->lg_signal = be32_to_cpup(p++); | ||
1560 | lgp->lg_layout_type = be32_to_cpup(p++); | ||
1561 | lgp->lg_seg.iomode = be32_to_cpup(p++); | ||
1562 | p = xdr_decode_hyper(p, &lgp->lg_seg.offset); | ||
1563 | p = xdr_decode_hyper(p, &lgp->lg_seg.length); | ||
1564 | p = xdr_decode_hyper(p, &lgp->lg_minlength); | ||
1565 | nfsd4_decode_stateid(argp, &lgp->lg_sid); | ||
1566 | READ_BUF(4); | ||
1567 | lgp->lg_maxcount = be32_to_cpup(p++); | ||
1568 | |||
1569 | DECODE_TAIL; | ||
1570 | } | ||
1571 | |||
1572 | static __be32 | ||
1573 | nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, | ||
1574 | struct nfsd4_layoutcommit *lcp) | ||
1575 | { | ||
1576 | DECODE_HEAD; | ||
1577 | u32 timechange; | ||
1578 | |||
1579 | READ_BUF(20); | ||
1580 | p = xdr_decode_hyper(p, &lcp->lc_seg.offset); | ||
1581 | p = xdr_decode_hyper(p, &lcp->lc_seg.length); | ||
1582 | lcp->lc_reclaim = be32_to_cpup(p++); | ||
1583 | nfsd4_decode_stateid(argp, &lcp->lc_sid); | ||
1584 | READ_BUF(4); | ||
1585 | lcp->lc_newoffset = be32_to_cpup(p++); | ||
1586 | if (lcp->lc_newoffset) { | ||
1587 | READ_BUF(8); | ||
1588 | p = xdr_decode_hyper(p, &lcp->lc_last_wr); | ||
1589 | } else | ||
1590 | lcp->lc_last_wr = 0; | ||
1591 | READ_BUF(4); | ||
1592 | timechange = be32_to_cpup(p++); | ||
1593 | if (timechange) { | ||
1594 | status = nfsd4_decode_time(argp, &lcp->lc_mtime); | ||
1595 | if (status) | ||
1596 | return status; | ||
1597 | } else { | ||
1598 | lcp->lc_mtime.tv_nsec = UTIME_NOW; | ||
1599 | } | ||
1600 | READ_BUF(8); | ||
1601 | lcp->lc_layout_type = be32_to_cpup(p++); | ||
1602 | |||
1603 | /* | ||
1604 | * Save the layout update in XDR format and let the layout driver deal | ||
1605 | * with it later. | ||
1606 | */ | ||
1607 | lcp->lc_up_len = be32_to_cpup(p++); | ||
1608 | if (lcp->lc_up_len > 0) { | ||
1609 | READ_BUF(lcp->lc_up_len); | ||
1610 | READMEM(lcp->lc_up_layout, lcp->lc_up_len); | ||
1611 | } | ||
1612 | |||
1613 | DECODE_TAIL; | ||
1614 | } | ||
1615 | |||
1616 | static __be32 | ||
1617 | nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, | ||
1618 | struct nfsd4_layoutreturn *lrp) | ||
1619 | { | ||
1620 | DECODE_HEAD; | ||
1621 | |||
1622 | READ_BUF(16); | ||
1623 | lrp->lr_reclaim = be32_to_cpup(p++); | ||
1624 | lrp->lr_layout_type = be32_to_cpup(p++); | ||
1625 | lrp->lr_seg.iomode = be32_to_cpup(p++); | ||
1626 | lrp->lr_return_type = be32_to_cpup(p++); | ||
1627 | if (lrp->lr_return_type == RETURN_FILE) { | ||
1628 | READ_BUF(16); | ||
1629 | p = xdr_decode_hyper(p, &lrp->lr_seg.offset); | ||
1630 | p = xdr_decode_hyper(p, &lrp->lr_seg.length); | ||
1631 | nfsd4_decode_stateid(argp, &lrp->lr_sid); | ||
1632 | READ_BUF(4); | ||
1633 | lrp->lrf_body_len = be32_to_cpup(p++); | ||
1634 | if (lrp->lrf_body_len > 0) { | ||
1635 | READ_BUF(lrp->lrf_body_len); | ||
1636 | READMEM(lrp->lrf_body, lrp->lrf_body_len); | ||
1637 | } | ||
1638 | } else { | ||
1639 | lrp->lr_seg.offset = 0; | ||
1640 | lrp->lr_seg.length = NFS4_MAX_UINT64; | ||
1641 | } | ||
1642 | |||
1643 | DECODE_TAIL; | ||
1644 | } | ||
1645 | #endif /* CONFIG_NFSD_PNFS */ | ||
1646 | |||
1516 | static __be32 | 1647 | static __be32 |
1517 | nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, | 1648 | nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, |
1518 | struct nfsd4_fallocate *fallocate) | 1649 | struct nfsd4_fallocate *fallocate) |
@@ -1607,11 +1738,19 @@ static nfsd4_dec nfsd4_dec_ops[] = { | |||
1607 | [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, | 1738 | [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, |
1608 | [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid, | 1739 | [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid, |
1609 | [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, | 1740 | [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, |
1741 | #ifdef CONFIG_NFSD_PNFS | ||
1742 | [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdeviceinfo, | ||
1743 | [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1744 | [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_layoutcommit, | ||
1745 | [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_layoutget, | ||
1746 | [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_layoutreturn, | ||
1747 | #else | ||
1610 | [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp, | 1748 | [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp, |
1611 | [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, | 1749 | [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, |
1612 | [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, | 1750 | [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, |
1613 | [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, | 1751 | [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, |
1614 | [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, | 1752 | [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, |
1753 | #endif | ||
1615 | [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name, | 1754 | [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name, |
1616 | [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, | 1755 | [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, |
1617 | [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, | 1756 | [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, |
@@ -2539,6 +2678,30 @@ out_acl: | |||
2539 | get_parent_attributes(exp, &stat); | 2678 | get_parent_attributes(exp, &stat); |
2540 | p = xdr_encode_hyper(p, stat.ino); | 2679 | p = xdr_encode_hyper(p, stat.ino); |
2541 | } | 2680 | } |
2681 | #ifdef CONFIG_NFSD_PNFS | ||
2682 | if ((bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) || | ||
2683 | (bmval2 & FATTR4_WORD2_LAYOUT_TYPES)) { | ||
2684 | if (exp->ex_layout_type) { | ||
2685 | p = xdr_reserve_space(xdr, 8); | ||
2686 | if (!p) | ||
2687 | goto out_resource; | ||
2688 | *p++ = cpu_to_be32(1); | ||
2689 | *p++ = cpu_to_be32(exp->ex_layout_type); | ||
2690 | } else { | ||
2691 | p = xdr_reserve_space(xdr, 4); | ||
2692 | if (!p) | ||
2693 | goto out_resource; | ||
2694 | *p++ = cpu_to_be32(0); | ||
2695 | } | ||
2696 | } | ||
2697 | |||
2698 | if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) { | ||
2699 | p = xdr_reserve_space(xdr, 4); | ||
2700 | if (!p) | ||
2701 | goto out_resource; | ||
2702 | *p++ = cpu_to_be32(stat.blksize); | ||
2703 | } | ||
2704 | #endif /* CONFIG_NFSD_PNFS */ | ||
2542 | if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { | 2705 | if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { |
2543 | status = nfsd4_encode_security_label(xdr, rqstp, context, | 2706 | status = nfsd4_encode_security_label(xdr, rqstp, context, |
2544 | contextlen); | 2707 | contextlen); |
@@ -2768,16 +2931,17 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, | |||
2768 | if (entry_bytes > cd->rd_maxcount) | 2931 | if (entry_bytes > cd->rd_maxcount) |
2769 | goto fail; | 2932 | goto fail; |
2770 | cd->rd_maxcount -= entry_bytes; | 2933 | cd->rd_maxcount -= entry_bytes; |
2771 | if (!cd->rd_dircount) | ||
2772 | goto fail; | ||
2773 | /* | 2934 | /* |
2774 | * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so | 2935 | * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so |
2775 | * let's always let through the first entry, at least: | 2936 | * let's always let through the first entry, at least: |
2776 | */ | 2937 | */ |
2777 | name_and_cookie = 4 * XDR_QUADLEN(namlen) + 8; | 2938 | if (!cd->rd_dircount) |
2939 | goto fail; | ||
2940 | name_and_cookie = 4 + 4 * XDR_QUADLEN(namlen) + 8; | ||
2778 | if (name_and_cookie > cd->rd_dircount && cd->cookie_offset) | 2941 | if (name_and_cookie > cd->rd_dircount && cd->cookie_offset) |
2779 | goto fail; | 2942 | goto fail; |
2780 | cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie); | 2943 | cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie); |
2944 | |||
2781 | cd->cookie_offset = cookie_offset; | 2945 | cd->cookie_offset = cookie_offset; |
2782 | skip_entry: | 2946 | skip_entry: |
2783 | cd->common.err = nfs_ok; | 2947 | cd->common.err = nfs_ok; |
@@ -3814,6 +3978,156 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, | |||
3814 | return nfserr; | 3978 | return nfserr; |
3815 | } | 3979 | } |
3816 | 3980 | ||
3981 | #ifdef CONFIG_NFSD_PNFS | ||
3982 | static __be32 | ||
3983 | nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, | ||
3984 | struct nfsd4_getdeviceinfo *gdev) | ||
3985 | { | ||
3986 | struct xdr_stream *xdr = &resp->xdr; | ||
3987 | const struct nfsd4_layout_ops *ops = | ||
3988 | nfsd4_layout_ops[gdev->gd_layout_type]; | ||
3989 | u32 starting_len = xdr->buf->len, needed_len; | ||
3990 | __be32 *p; | ||
3991 | |||
3992 | dprintk("%s: err %d\n", __func__, nfserr); | ||
3993 | if (nfserr) | ||
3994 | goto out; | ||
3995 | |||
3996 | nfserr = nfserr_resource; | ||
3997 | p = xdr_reserve_space(xdr, 4); | ||
3998 | if (!p) | ||
3999 | goto out; | ||
4000 | |||
4001 | *p++ = cpu_to_be32(gdev->gd_layout_type); | ||
4002 | |||
4003 | /* If maxcount is 0 then just update notifications */ | ||
4004 | if (gdev->gd_maxcount != 0) { | ||
4005 | nfserr = ops->encode_getdeviceinfo(xdr, gdev); | ||
4006 | if (nfserr) { | ||
4007 | /* | ||
4008 | * We don't bother to burden the layout drivers with | ||
4009 | * enforcing gd_maxcount, just tell the client to | ||
4010 | * come back with a bigger buffer if it's not enough. | ||
4011 | */ | ||
4012 | if (xdr->buf->len + 4 > gdev->gd_maxcount) | ||
4013 | goto toosmall; | ||
4014 | goto out; | ||
4015 | } | ||
4016 | } | ||
4017 | |||
4018 | nfserr = nfserr_resource; | ||
4019 | if (gdev->gd_notify_types) { | ||
4020 | p = xdr_reserve_space(xdr, 4 + 4); | ||
4021 | if (!p) | ||
4022 | goto out; | ||
4023 | *p++ = cpu_to_be32(1); /* bitmap length */ | ||
4024 | *p++ = cpu_to_be32(gdev->gd_notify_types); | ||
4025 | } else { | ||
4026 | p = xdr_reserve_space(xdr, 4); | ||
4027 | if (!p) | ||
4028 | goto out; | ||
4029 | *p++ = 0; | ||
4030 | } | ||
4031 | |||
4032 | nfserr = 0; | ||
4033 | out: | ||
4034 | kfree(gdev->gd_device); | ||
4035 | dprintk("%s: done: %d\n", __func__, be32_to_cpu(nfserr)); | ||
4036 | return nfserr; | ||
4037 | |||
4038 | toosmall: | ||
4039 | dprintk("%s: maxcount too small\n", __func__); | ||
4040 | needed_len = xdr->buf->len + 4 /* notifications */; | ||
4041 | xdr_truncate_encode(xdr, starting_len); | ||
4042 | p = xdr_reserve_space(xdr, 4); | ||
4043 | if (!p) { | ||
4044 | nfserr = nfserr_resource; | ||
4045 | } else { | ||
4046 | *p++ = cpu_to_be32(needed_len); | ||
4047 | nfserr = nfserr_toosmall; | ||
4048 | } | ||
4049 | goto out; | ||
4050 | } | ||
4051 | |||
4052 | static __be32 | ||
4053 | nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, | ||
4054 | struct nfsd4_layoutget *lgp) | ||
4055 | { | ||
4056 | struct xdr_stream *xdr = &resp->xdr; | ||
4057 | const struct nfsd4_layout_ops *ops = | ||
4058 | nfsd4_layout_ops[lgp->lg_layout_type]; | ||
4059 | __be32 *p; | ||
4060 | |||
4061 | dprintk("%s: err %d\n", __func__, nfserr); | ||
4062 | if (nfserr) | ||
4063 | goto out; | ||
4064 | |||
4065 | nfserr = nfserr_resource; | ||
4066 | p = xdr_reserve_space(xdr, 36 + sizeof(stateid_opaque_t)); | ||
4067 | if (!p) | ||
4068 | goto out; | ||
4069 | |||
4070 | *p++ = cpu_to_be32(1); /* we always set return-on-close */ | ||
4071 | *p++ = cpu_to_be32(lgp->lg_sid.si_generation); | ||
4072 | p = xdr_encode_opaque_fixed(p, &lgp->lg_sid.si_opaque, | ||
4073 | sizeof(stateid_opaque_t)); | ||
4074 | |||
4075 | *p++ = cpu_to_be32(1); /* we always return a single layout */ | ||
4076 | p = xdr_encode_hyper(p, lgp->lg_seg.offset); | ||
4077 | p = xdr_encode_hyper(p, lgp->lg_seg.length); | ||
4078 | *p++ = cpu_to_be32(lgp->lg_seg.iomode); | ||
4079 | *p++ = cpu_to_be32(lgp->lg_layout_type); | ||
4080 | |||
4081 | nfserr = ops->encode_layoutget(xdr, lgp); | ||
4082 | out: | ||
4083 | kfree(lgp->lg_content); | ||
4084 | return nfserr; | ||
4085 | } | ||
4086 | |||
4087 | static __be32 | ||
4088 | nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, | ||
4089 | struct nfsd4_layoutcommit *lcp) | ||
4090 | { | ||
4091 | struct xdr_stream *xdr = &resp->xdr; | ||
4092 | __be32 *p; | ||
4093 | |||
4094 | if (nfserr) | ||
4095 | return nfserr; | ||
4096 | |||
4097 | p = xdr_reserve_space(xdr, 4); | ||
4098 | if (!p) | ||
4099 | return nfserr_resource; | ||
4100 | *p++ = cpu_to_be32(lcp->lc_size_chg); | ||
4101 | if (lcp->lc_size_chg) { | ||
4102 | p = xdr_reserve_space(xdr, 8); | ||
4103 | if (!p) | ||
4104 | return nfserr_resource; | ||
4105 | p = xdr_encode_hyper(p, lcp->lc_newsize); | ||
4106 | } | ||
4107 | |||
4108 | return nfs_ok; | ||
4109 | } | ||
4110 | |||
4111 | static __be32 | ||
4112 | nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, | ||
4113 | struct nfsd4_layoutreturn *lrp) | ||
4114 | { | ||
4115 | struct xdr_stream *xdr = &resp->xdr; | ||
4116 | __be32 *p; | ||
4117 | |||
4118 | if (nfserr) | ||
4119 | return nfserr; | ||
4120 | |||
4121 | p = xdr_reserve_space(xdr, 4); | ||
4122 | if (!p) | ||
4123 | return nfserr_resource; | ||
4124 | *p++ = cpu_to_be32(lrp->lrs_present); | ||
4125 | if (lrp->lrs_present) | ||
4126 | nfsd4_encode_stateid(xdr, &lrp->lr_sid); | ||
4127 | return nfs_ok; | ||
4128 | } | ||
4129 | #endif /* CONFIG_NFSD_PNFS */ | ||
4130 | |||
3817 | static __be32 | 4131 | static __be32 |
3818 | nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, | 4132 | nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, |
3819 | struct nfsd4_seek *seek) | 4133 | struct nfsd4_seek *seek) |
@@ -3890,11 +4204,19 @@ static nfsd4_enc nfsd4_enc_ops[] = { | |||
3890 | [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop, | 4204 | [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop, |
3891 | [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, | 4205 | [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, |
3892 | [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, | 4206 | [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, |
4207 | #ifdef CONFIG_NFSD_PNFS | ||
4208 | [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdeviceinfo, | ||
4209 | [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, | ||
4210 | [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_layoutcommit, | ||
4211 | [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_layoutget, | ||
4212 | [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_layoutreturn, | ||
4213 | #else | ||
3893 | [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, | 4214 | [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, |
3894 | [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, | 4215 | [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, |
3895 | [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, | 4216 | [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, |
3896 | [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, | 4217 | [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, |
3897 | [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, | 4218 | [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, |
4219 | #endif | ||
3898 | [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name, | 4220 | [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name, |
3899 | [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, | 4221 | [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, |
3900 | [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, | 4222 | [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 19ace74d35f6..aa47d75ddb26 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include "cache.h" | 21 | #include "cache.h" |
22 | #include "state.h" | 22 | #include "state.h" |
23 | #include "netns.h" | 23 | #include "netns.h" |
24 | #include "pnfs.h" | ||
24 | 25 | ||
25 | /* | 26 | /* |
26 | * We have a single directory with several nodes in it. | 27 | * We have a single directory with several nodes in it. |
@@ -1258,9 +1259,12 @@ static int __init init_nfsd(void) | |||
1258 | retval = nfsd4_init_slabs(); | 1259 | retval = nfsd4_init_slabs(); |
1259 | if (retval) | 1260 | if (retval) |
1260 | goto out_unregister_pernet; | 1261 | goto out_unregister_pernet; |
1261 | retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ | 1262 | retval = nfsd4_init_pnfs(); |
1262 | if (retval) | 1263 | if (retval) |
1263 | goto out_free_slabs; | 1264 | goto out_free_slabs; |
1265 | retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ | ||
1266 | if (retval) | ||
1267 | goto out_exit_pnfs; | ||
1264 | nfsd_stat_init(); /* Statistics */ | 1268 | nfsd_stat_init(); /* Statistics */ |
1265 | retval = nfsd_reply_cache_init(); | 1269 | retval = nfsd_reply_cache_init(); |
1266 | if (retval) | 1270 | if (retval) |
@@ -1282,6 +1286,8 @@ out_free_lockd: | |||
1282 | out_free_stat: | 1286 | out_free_stat: |
1283 | nfsd_stat_shutdown(); | 1287 | nfsd_stat_shutdown(); |
1284 | nfsd_fault_inject_cleanup(); | 1288 | nfsd_fault_inject_cleanup(); |
1289 | out_exit_pnfs: | ||
1290 | nfsd4_exit_pnfs(); | ||
1285 | out_free_slabs: | 1291 | out_free_slabs: |
1286 | nfsd4_free_slabs(); | 1292 | nfsd4_free_slabs(); |
1287 | out_unregister_pernet: | 1293 | out_unregister_pernet: |
@@ -1299,6 +1305,7 @@ static void __exit exit_nfsd(void) | |||
1299 | nfsd_stat_shutdown(); | 1305 | nfsd_stat_shutdown(); |
1300 | nfsd_lockd_shutdown(); | 1306 | nfsd_lockd_shutdown(); |
1301 | nfsd4_free_slabs(); | 1307 | nfsd4_free_slabs(); |
1308 | nfsd4_exit_pnfs(); | ||
1302 | nfsd_fault_inject_cleanup(); | 1309 | nfsd_fault_inject_cleanup(); |
1303 | unregister_filesystem(&nfsd_fs_type); | 1310 | unregister_filesystem(&nfsd_fs_type); |
1304 | unregister_pernet_subsys(&nfsd_net_ops); | 1311 | unregister_pernet_subsys(&nfsd_net_ops); |
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 33a46a8dfaf7..565c4da1a9eb 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h | |||
@@ -325,15 +325,27 @@ void nfsd_lockd_shutdown(void); | |||
325 | 325 | ||
326 | #define NFSD4_SUPPORTED_ATTRS_WORD2 0 | 326 | #define NFSD4_SUPPORTED_ATTRS_WORD2 0 |
327 | 327 | ||
328 | /* 4.1 */ | ||
329 | #ifdef CONFIG_NFSD_PNFS | ||
330 | #define PNFSD_SUPPORTED_ATTRS_WORD1 FATTR4_WORD1_FS_LAYOUT_TYPES | ||
331 | #define PNFSD_SUPPORTED_ATTRS_WORD2 \ | ||
332 | (FATTR4_WORD2_LAYOUT_BLKSIZE | FATTR4_WORD2_LAYOUT_TYPES) | ||
333 | #else | ||
334 | #define PNFSD_SUPPORTED_ATTRS_WORD1 0 | ||
335 | #define PNFSD_SUPPORTED_ATTRS_WORD2 0 | ||
336 | #endif /* CONFIG_NFSD_PNFS */ | ||
337 | |||
328 | #define NFSD4_1_SUPPORTED_ATTRS_WORD0 \ | 338 | #define NFSD4_1_SUPPORTED_ATTRS_WORD0 \ |
329 | NFSD4_SUPPORTED_ATTRS_WORD0 | 339 | NFSD4_SUPPORTED_ATTRS_WORD0 |
330 | 340 | ||
331 | #define NFSD4_1_SUPPORTED_ATTRS_WORD1 \ | 341 | #define NFSD4_1_SUPPORTED_ATTRS_WORD1 \ |
332 | NFSD4_SUPPORTED_ATTRS_WORD1 | 342 | (NFSD4_SUPPORTED_ATTRS_WORD1 | PNFSD_SUPPORTED_ATTRS_WORD1) |
333 | 343 | ||
334 | #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \ | 344 | #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \ |
335 | (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) | 345 | (NFSD4_SUPPORTED_ATTRS_WORD2 | PNFSD_SUPPORTED_ATTRS_WORD2 | \ |
346 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) | ||
336 | 347 | ||
348 | /* 4.2 */ | ||
337 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | 349 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL |
338 | #define NFSD4_2_SECURITY_ATTRS FATTR4_WORD2_SECURITY_LABEL | 350 | #define NFSD4_2_SECURITY_ATTRS FATTR4_WORD2_SECURITY_LABEL |
339 | #else | 351 | #else |
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 08236d70c667..f22920442172 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h | |||
@@ -187,6 +187,24 @@ fh_init(struct svc_fh *fhp, int maxsize) | |||
187 | return fhp; | 187 | return fhp; |
188 | } | 188 | } |
189 | 189 | ||
190 | static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) | ||
191 | { | ||
192 | if (fh1->fh_size != fh2->fh_size) | ||
193 | return false; | ||
194 | if (memcmp(fh1->fh_base.fh_pad, fh2->fh_base.fh_pad, fh1->fh_size) != 0) | ||
195 | return false; | ||
196 | return true; | ||
197 | } | ||
198 | |||
199 | static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) | ||
200 | { | ||
201 | if (fh1->fh_fsid_type != fh2->fh_fsid_type) | ||
202 | return false; | ||
203 | if (memcmp(fh1->fh_fsid, fh2->fh_fsid, key_len(fh1->fh_fsid_type)) != 0) | ||
204 | return false; | ||
205 | return true; | ||
206 | } | ||
207 | |||
190 | #ifdef CONFIG_NFSD_V3 | 208 | #ifdef CONFIG_NFSD_V3 |
191 | /* | 209 | /* |
192 | * The wcc data stored in current_fh should be cleared | 210 | * The wcc data stored in current_fh should be cleared |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 314f5c8f8f1a..9277cc91c21b 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -119,6 +119,7 @@ struct svc_program nfsd_program = { | |||
119 | static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = { | 119 | static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = { |
120 | [0] = 1, | 120 | [0] = 1, |
121 | [1] = 1, | 121 | [1] = 1, |
122 | [2] = 1, | ||
122 | }; | 123 | }; |
123 | 124 | ||
124 | int nfsd_vers(int vers, enum vers_op change) | 125 | int nfsd_vers(int vers, enum vers_op change) |
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h new file mode 100644 index 000000000000..d4c4453674c6 --- /dev/null +++ b/fs/nfsd/pnfs.h | |||
@@ -0,0 +1,86 @@ | |||
1 | #ifndef _FS_NFSD_PNFS_H | ||
2 | #define _FS_NFSD_PNFS_H 1 | ||
3 | |||
4 | #ifdef CONFIG_NFSD_V4 | ||
5 | #include <linux/exportfs.h> | ||
6 | #include <linux/nfsd/export.h> | ||
7 | |||
8 | #include "state.h" | ||
9 | #include "xdr4.h" | ||
10 | |||
11 | struct xdr_stream; | ||
12 | |||
13 | struct nfsd4_deviceid_map { | ||
14 | struct list_head hash; | ||
15 | u64 idx; | ||
16 | int fsid_type; | ||
17 | u32 fsid[]; | ||
18 | }; | ||
19 | |||
20 | struct nfsd4_layout_ops { | ||
21 | u32 notify_types; | ||
22 | |||
23 | __be32 (*proc_getdeviceinfo)(struct super_block *sb, | ||
24 | struct nfsd4_getdeviceinfo *gdevp); | ||
25 | __be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr, | ||
26 | struct nfsd4_getdeviceinfo *gdevp); | ||
27 | |||
28 | __be32 (*proc_layoutget)(struct inode *, const struct svc_fh *fhp, | ||
29 | struct nfsd4_layoutget *lgp); | ||
30 | __be32 (*encode_layoutget)(struct xdr_stream *, | ||
31 | struct nfsd4_layoutget *lgp); | ||
32 | |||
33 | __be32 (*proc_layoutcommit)(struct inode *inode, | ||
34 | struct nfsd4_layoutcommit *lcp); | ||
35 | }; | ||
36 | |||
37 | extern const struct nfsd4_layout_ops *nfsd4_layout_ops[]; | ||
38 | extern const struct nfsd4_layout_ops bl_layout_ops; | ||
39 | |||
40 | __be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp, | ||
41 | struct nfsd4_compound_state *cstate, stateid_t *stateid, | ||
42 | bool create, u32 layout_type, struct nfs4_layout_stateid **lsp); | ||
43 | __be32 nfsd4_insert_layout(struct nfsd4_layoutget *lgp, | ||
44 | struct nfs4_layout_stateid *ls); | ||
45 | __be32 nfsd4_return_file_layouts(struct svc_rqst *rqstp, | ||
46 | struct nfsd4_compound_state *cstate, | ||
47 | struct nfsd4_layoutreturn *lrp); | ||
48 | __be32 nfsd4_return_client_layouts(struct svc_rqst *rqstp, | ||
49 | struct nfsd4_compound_state *cstate, | ||
50 | struct nfsd4_layoutreturn *lrp); | ||
51 | int nfsd4_set_deviceid(struct nfsd4_deviceid *id, const struct svc_fh *fhp, | ||
52 | u32 device_generation); | ||
53 | struct nfsd4_deviceid_map *nfsd4_find_devid_map(int idx); | ||
54 | #endif /* CONFIG_NFSD_V4 */ | ||
55 | |||
56 | #ifdef CONFIG_NFSD_PNFS | ||
57 | void nfsd4_setup_layout_type(struct svc_export *exp); | ||
58 | void nfsd4_return_all_client_layouts(struct nfs4_client *); | ||
59 | void nfsd4_return_all_file_layouts(struct nfs4_client *clp, | ||
60 | struct nfs4_file *fp); | ||
61 | int nfsd4_init_pnfs(void); | ||
62 | void nfsd4_exit_pnfs(void); | ||
63 | #else | ||
64 | struct nfs4_client; | ||
65 | struct nfs4_file; | ||
66 | |||
67 | static inline void nfsd4_setup_layout_type(struct svc_export *exp) | ||
68 | { | ||
69 | } | ||
70 | |||
71 | static inline void nfsd4_return_all_client_layouts(struct nfs4_client *clp) | ||
72 | { | ||
73 | } | ||
74 | static inline void nfsd4_return_all_file_layouts(struct nfs4_client *clp, | ||
75 | struct nfs4_file *fp) | ||
76 | { | ||
77 | } | ||
78 | static inline void nfsd4_exit_pnfs(void) | ||
79 | { | ||
80 | } | ||
81 | static inline int nfsd4_init_pnfs(void) | ||
82 | { | ||
83 | return 0; | ||
84 | } | ||
85 | #endif /* CONFIG_NFSD_PNFS */ | ||
86 | #endif /* _FS_NFSD_PNFS_H */ | ||
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 9d3be371240a..4f3bfeb11766 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h | |||
@@ -92,6 +92,7 @@ struct nfs4_stid { | |||
92 | /* For a deleg stateid kept around only to process free_stateid's: */ | 92 | /* For a deleg stateid kept around only to process free_stateid's: */ |
93 | #define NFS4_REVOKED_DELEG_STID 16 | 93 | #define NFS4_REVOKED_DELEG_STID 16 |
94 | #define NFS4_CLOSED_DELEG_STID 32 | 94 | #define NFS4_CLOSED_DELEG_STID 32 |
95 | #define NFS4_LAYOUT_STID 64 | ||
95 | unsigned char sc_type; | 96 | unsigned char sc_type; |
96 | stateid_t sc_stateid; | 97 | stateid_t sc_stateid; |
97 | struct nfs4_client *sc_client; | 98 | struct nfs4_client *sc_client; |
@@ -297,6 +298,9 @@ struct nfs4_client { | |||
297 | struct list_head cl_delegations; | 298 | struct list_head cl_delegations; |
298 | struct list_head cl_revoked; /* unacknowledged, revoked 4.1 state */ | 299 | struct list_head cl_revoked; /* unacknowledged, revoked 4.1 state */ |
299 | struct list_head cl_lru; /* tail queue */ | 300 | struct list_head cl_lru; /* tail queue */ |
301 | #ifdef CONFIG_NFSD_PNFS | ||
302 | struct list_head cl_lo_states; /* outstanding layout states */ | ||
303 | #endif | ||
300 | struct xdr_netobj cl_name; /* id generated by client */ | 304 | struct xdr_netobj cl_name; /* id generated by client */ |
301 | nfs4_verifier cl_verifier; /* generated by client */ | 305 | nfs4_verifier cl_verifier; /* generated by client */ |
302 | time_t cl_time; /* time of last lease renewal */ | 306 | time_t cl_time; /* time of last lease renewal */ |
@@ -493,9 +497,13 @@ struct nfs4_file { | |||
493 | atomic_t fi_access[2]; | 497 | atomic_t fi_access[2]; |
494 | u32 fi_share_deny; | 498 | u32 fi_share_deny; |
495 | struct file *fi_deleg_file; | 499 | struct file *fi_deleg_file; |
496 | atomic_t fi_delegees; | 500 | int fi_delegees; |
497 | struct knfsd_fh fi_fhandle; | 501 | struct knfsd_fh fi_fhandle; |
498 | bool fi_had_conflict; | 502 | bool fi_had_conflict; |
503 | #ifdef CONFIG_NFSD_PNFS | ||
504 | struct list_head fi_lo_states; | ||
505 | atomic_t fi_lo_recalls; | ||
506 | #endif | ||
499 | }; | 507 | }; |
500 | 508 | ||
501 | /* | 509 | /* |
@@ -528,6 +536,24 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) | |||
528 | return container_of(s, struct nfs4_ol_stateid, st_stid); | 536 | return container_of(s, struct nfs4_ol_stateid, st_stid); |
529 | } | 537 | } |
530 | 538 | ||
539 | struct nfs4_layout_stateid { | ||
540 | struct nfs4_stid ls_stid; | ||
541 | struct list_head ls_perclnt; | ||
542 | struct list_head ls_perfile; | ||
543 | spinlock_t ls_lock; | ||
544 | struct list_head ls_layouts; | ||
545 | u32 ls_layout_type; | ||
546 | struct file *ls_file; | ||
547 | struct nfsd4_callback ls_recall; | ||
548 | stateid_t ls_recall_sid; | ||
549 | bool ls_recalled; | ||
550 | }; | ||
551 | |||
552 | static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s) | ||
553 | { | ||
554 | return container_of(s, struct nfs4_layout_stateid, ls_stid); | ||
555 | } | ||
556 | |||
531 | /* flags for preprocess_seqid_op() */ | 557 | /* flags for preprocess_seqid_op() */ |
532 | #define RD_STATE 0x00000010 | 558 | #define RD_STATE 0x00000010 |
533 | #define WR_STATE 0x00000020 | 559 | #define WR_STATE 0x00000020 |
@@ -535,6 +561,7 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) | |||
535 | enum nfsd4_cb_op { | 561 | enum nfsd4_cb_op { |
536 | NFSPROC4_CLNT_CB_NULL = 0, | 562 | NFSPROC4_CLNT_CB_NULL = 0, |
537 | NFSPROC4_CLNT_CB_RECALL, | 563 | NFSPROC4_CLNT_CB_RECALL, |
564 | NFSPROC4_CLNT_CB_LAYOUT, | ||
538 | NFSPROC4_CLNT_CB_SEQUENCE, | 565 | NFSPROC4_CLNT_CB_SEQUENCE, |
539 | }; | 566 | }; |
540 | 567 | ||
@@ -545,6 +572,12 @@ struct nfsd_net; | |||
545 | extern __be32 nfs4_preprocess_stateid_op(struct net *net, | 572 | extern __be32 nfs4_preprocess_stateid_op(struct net *net, |
546 | struct nfsd4_compound_state *cstate, | 573 | struct nfsd4_compound_state *cstate, |
547 | stateid_t *stateid, int flags, struct file **filp); | 574 | stateid_t *stateid, int flags, struct file **filp); |
575 | __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, | ||
576 | stateid_t *stateid, unsigned char typemask, | ||
577 | struct nfs4_stid **s, struct nfsd_net *nn); | ||
578 | struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, | ||
579 | struct kmem_cache *slab); | ||
580 | void nfs4_unhash_stid(struct nfs4_stid *s); | ||
548 | void nfs4_put_stid(struct nfs4_stid *s); | 581 | void nfs4_put_stid(struct nfs4_stid *s); |
549 | void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); | 582 | void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); |
550 | extern void nfs4_release_reclaim(struct nfsd_net *); | 583 | extern void nfs4_release_reclaim(struct nfsd_net *); |
@@ -567,6 +600,14 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, | |||
567 | struct nfsd_net *nn); | 600 | struct nfsd_net *nn); |
568 | extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); | 601 | extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); |
569 | 602 | ||
603 | struct nfs4_file *find_file(struct knfsd_fh *fh); | ||
604 | void put_nfs4_file(struct nfs4_file *fi); | ||
605 | static inline void get_nfs4_file(struct nfs4_file *fi) | ||
606 | { | ||
607 | atomic_inc(&fi->fi_ref); | ||
608 | } | ||
609 | struct file *find_any_file(struct nfs4_file *f); | ||
610 | |||
570 | /* grace period management */ | 611 | /* grace period management */ |
571 | void nfsd4_end_grace(struct nfsd_net *nn); | 612 | void nfsd4_end_grace(struct nfsd_net *nn); |
572 | 613 | ||
diff --git a/fs/nfsd/trace.c b/fs/nfsd/trace.c new file mode 100644 index 000000000000..82f89070594c --- /dev/null +++ b/fs/nfsd/trace.c | |||
@@ -0,0 +1,5 @@ | |||
1 | |||
2 | #include "state.h" | ||
3 | |||
4 | #define CREATE_TRACE_POINTS | ||
5 | #include "trace.h" | ||
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h new file mode 100644 index 000000000000..c668520c344b --- /dev/null +++ b/fs/nfsd/trace.h | |||
@@ -0,0 +1,54 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014 Christoph Hellwig. | ||
3 | */ | ||
4 | #undef TRACE_SYSTEM | ||
5 | #define TRACE_SYSTEM nfsd | ||
6 | |||
7 | #if !defined(_NFSD_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) | ||
8 | #define _NFSD_TRACE_H | ||
9 | |||
10 | #include <linux/tracepoint.h> | ||
11 | |||
12 | DECLARE_EVENT_CLASS(nfsd_stateid_class, | ||
13 | TP_PROTO(stateid_t *stp), | ||
14 | TP_ARGS(stp), | ||
15 | TP_STRUCT__entry( | ||
16 | __field(u32, cl_boot) | ||
17 | __field(u32, cl_id) | ||
18 | __field(u32, si_id) | ||
19 | __field(u32, si_generation) | ||
20 | ), | ||
21 | TP_fast_assign( | ||
22 | __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; | ||
23 | __entry->cl_id = stp->si_opaque.so_clid.cl_id; | ||
24 | __entry->si_id = stp->si_opaque.so_id; | ||
25 | __entry->si_generation = stp->si_generation; | ||
26 | ), | ||
27 | TP_printk("client %08x:%08x stateid %08x:%08x", | ||
28 | __entry->cl_boot, | ||
29 | __entry->cl_id, | ||
30 | __entry->si_id, | ||
31 | __entry->si_generation) | ||
32 | ) | ||
33 | |||
34 | #define DEFINE_STATEID_EVENT(name) \ | ||
35 | DEFINE_EVENT(nfsd_stateid_class, name, \ | ||
36 | TP_PROTO(stateid_t *stp), \ | ||
37 | TP_ARGS(stp)) | ||
38 | DEFINE_STATEID_EVENT(layoutstate_alloc); | ||
39 | DEFINE_STATEID_EVENT(layoutstate_unhash); | ||
40 | DEFINE_STATEID_EVENT(layoutstate_free); | ||
41 | DEFINE_STATEID_EVENT(layout_get_lookup_fail); | ||
42 | DEFINE_STATEID_EVENT(layout_commit_lookup_fail); | ||
43 | DEFINE_STATEID_EVENT(layout_return_lookup_fail); | ||
44 | DEFINE_STATEID_EVENT(layout_recall); | ||
45 | DEFINE_STATEID_EVENT(layout_recall_done); | ||
46 | DEFINE_STATEID_EVENT(layout_recall_fail); | ||
47 | DEFINE_STATEID_EVENT(layout_recall_release); | ||
48 | |||
49 | #endif /* _NFSD_TRACE_H */ | ||
50 | |||
51 | #undef TRACE_INCLUDE_PATH | ||
52 | #define TRACE_INCLUDE_PATH . | ||
53 | #define TRACE_INCLUDE_FILE trace | ||
54 | #include <trace/define_trace.h> | ||
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 90a5925bd6ab..0bda93e58e1b 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h | |||
@@ -428,6 +428,61 @@ struct nfsd4_reclaim_complete { | |||
428 | u32 rca_one_fs; | 428 | u32 rca_one_fs; |
429 | }; | 429 | }; |
430 | 430 | ||
431 | struct nfsd4_deviceid { | ||
432 | u64 fsid_idx; | ||
433 | u32 generation; | ||
434 | u32 pad; | ||
435 | }; | ||
436 | |||
437 | struct nfsd4_layout_seg { | ||
438 | u32 iomode; | ||
439 | u64 offset; | ||
440 | u64 length; | ||
441 | }; | ||
442 | |||
443 | struct nfsd4_getdeviceinfo { | ||
444 | struct nfsd4_deviceid gd_devid; /* request */ | ||
445 | u32 gd_layout_type; /* request */ | ||
446 | u32 gd_maxcount; /* request */ | ||
447 | u32 gd_notify_types;/* request - response */ | ||
448 | void *gd_device; /* response */ | ||
449 | }; | ||
450 | |||
451 | struct nfsd4_layoutget { | ||
452 | u64 lg_minlength; /* request */ | ||
453 | u32 lg_signal; /* request */ | ||
454 | u32 lg_layout_type; /* request */ | ||
455 | u32 lg_maxcount; /* request */ | ||
456 | stateid_t lg_sid; /* request/response */ | ||
457 | struct nfsd4_layout_seg lg_seg; /* request/response */ | ||
458 | void *lg_content; /* response */ | ||
459 | }; | ||
460 | |||
461 | struct nfsd4_layoutcommit { | ||
462 | stateid_t lc_sid; /* request */ | ||
463 | struct nfsd4_layout_seg lc_seg; /* request */ | ||
464 | u32 lc_reclaim; /* request */ | ||
465 | u32 lc_newoffset; /* request */ | ||
466 | u64 lc_last_wr; /* request */ | ||
467 | struct timespec lc_mtime; /* request */ | ||
468 | u32 lc_layout_type; /* request */ | ||
469 | u32 lc_up_len; /* layout length */ | ||
470 | void *lc_up_layout; /* decoded by callback */ | ||
471 | u32 lc_size_chg; /* boolean for response */ | ||
472 | u64 lc_newsize; /* response */ | ||
473 | }; | ||
474 | |||
475 | struct nfsd4_layoutreturn { | ||
476 | u32 lr_return_type; /* request */ | ||
477 | u32 lr_layout_type; /* request */ | ||
478 | struct nfsd4_layout_seg lr_seg; /* request */ | ||
479 | u32 lr_reclaim; /* request */ | ||
480 | u32 lrf_body_len; /* request */ | ||
481 | void *lrf_body; /* request */ | ||
482 | stateid_t lr_sid; /* request/response */ | ||
483 | u32 lrs_present; /* response */ | ||
484 | }; | ||
485 | |||
431 | struct nfsd4_fallocate { | 486 | struct nfsd4_fallocate { |
432 | /* request */ | 487 | /* request */ |
433 | stateid_t falloc_stateid; | 488 | stateid_t falloc_stateid; |
@@ -491,6 +546,10 @@ struct nfsd4_op { | |||
491 | struct nfsd4_reclaim_complete reclaim_complete; | 546 | struct nfsd4_reclaim_complete reclaim_complete; |
492 | struct nfsd4_test_stateid test_stateid; | 547 | struct nfsd4_test_stateid test_stateid; |
493 | struct nfsd4_free_stateid free_stateid; | 548 | struct nfsd4_free_stateid free_stateid; |
549 | struct nfsd4_getdeviceinfo getdeviceinfo; | ||
550 | struct nfsd4_layoutget layoutget; | ||
551 | struct nfsd4_layoutcommit layoutcommit; | ||
552 | struct nfsd4_layoutreturn layoutreturn; | ||
494 | 553 | ||
495 | /* NFSv4.2 */ | 554 | /* NFSv4.2 */ |
496 | struct nfsd4_fallocate allocate; | 555 | struct nfsd4_fallocate allocate; |
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h index c5c55dfb91a9..c47f6fdb111a 100644 --- a/fs/nfsd/xdr4cb.h +++ b/fs/nfsd/xdr4cb.h | |||
@@ -21,3 +21,10 @@ | |||
21 | #define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ | 21 | #define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ |
22 | cb_sequence_dec_sz + \ | 22 | cb_sequence_dec_sz + \ |
23 | op_dec_sz) | 23 | op_dec_sz) |
24 | #define NFS4_enc_cb_layout_sz (cb_compound_enc_hdr_sz + \ | ||
25 | cb_sequence_enc_sz + \ | ||
26 | 1 + 3 + \ | ||
27 | enc_nfs4_fh_sz + 4) | ||
28 | #define NFS4_dec_cb_layout_sz (cb_compound_dec_hdr_sz + \ | ||
29 | cb_sequence_dec_sz + \ | ||
30 | op_dec_sz) | ||
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 3a03e0aea1fb..a8c728acb7a8 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c | |||
@@ -128,7 +128,6 @@ static const struct vm_operations_struct nilfs_file_vm_ops = { | |||
128 | .fault = filemap_fault, | 128 | .fault = filemap_fault, |
129 | .map_pages = filemap_map_pages, | 129 | .map_pages = filemap_map_pages, |
130 | .page_mkwrite = nilfs_page_mkwrite, | 130 | .page_mkwrite = nilfs_page_mkwrite, |
131 | .remap_pages = generic_file_remap_pages, | ||
132 | }; | 131 | }; |
133 | 132 | ||
134 | static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) | 133 | static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) |
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 57ceaf33d177..748ca238915a 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c | |||
@@ -172,7 +172,6 @@ int nilfs_init_gcinode(struct inode *inode) | |||
172 | inode->i_mode = S_IFREG; | 172 | inode->i_mode = S_IFREG; |
173 | mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); | 173 | mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); |
174 | inode->i_mapping->a_ops = &empty_aops; | 174 | inode->i_mapping->a_ops = &empty_aops; |
175 | inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; | ||
176 | 175 | ||
177 | ii->i_flags = 0; | 176 | ii->i_flags = 0; |
178 | nilfs_bmap_init_gc(ii->i_bmap); | 177 | nilfs_bmap_init_gc(ii->i_bmap); |
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index c4dcd1db57ee..892cf5ffdb8e 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c | |||
@@ -429,7 +429,6 @@ int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz) | |||
429 | 429 | ||
430 | inode->i_mode = S_IFREG; | 430 | inode->i_mode = S_IFREG; |
431 | mapping_set_gfp_mask(inode->i_mapping, gfp_mask); | 431 | mapping_set_gfp_mask(inode->i_mapping, gfp_mask); |
432 | inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; | ||
433 | 432 | ||
434 | inode->i_op = &def_mdt_iops; | 433 | inode->i_op = &def_mdt_iops; |
435 | inode->i_fop = &def_mdt_fops; | 434 | inode->i_fop = &def_mdt_fops; |
@@ -457,13 +456,12 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode, | |||
457 | struct nilfs_shadow_map *shadow) | 456 | struct nilfs_shadow_map *shadow) |
458 | { | 457 | { |
459 | struct nilfs_mdt_info *mi = NILFS_MDT(inode); | 458 | struct nilfs_mdt_info *mi = NILFS_MDT(inode); |
460 | struct backing_dev_info *bdi = inode->i_sb->s_bdi; | ||
461 | 459 | ||
462 | INIT_LIST_HEAD(&shadow->frozen_buffers); | 460 | INIT_LIST_HEAD(&shadow->frozen_buffers); |
463 | address_space_init_once(&shadow->frozen_data); | 461 | address_space_init_once(&shadow->frozen_data); |
464 | nilfs_mapping_init(&shadow->frozen_data, inode, bdi); | 462 | nilfs_mapping_init(&shadow->frozen_data, inode); |
465 | address_space_init_once(&shadow->frozen_btnodes); | 463 | address_space_init_once(&shadow->frozen_btnodes); |
466 | nilfs_mapping_init(&shadow->frozen_btnodes, inode, bdi); | 464 | nilfs_mapping_init(&shadow->frozen_btnodes, inode); |
467 | mi->mi_shadow = shadow; | 465 | mi->mi_shadow = shadow; |
468 | return 0; | 466 | return 0; |
469 | } | 467 | } |
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index da276640f776..700ecbcca55d 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c | |||
@@ -461,14 +461,12 @@ unsigned nilfs_page_count_clean_buffers(struct page *page, | |||
461 | return nc; | 461 | return nc; |
462 | } | 462 | } |
463 | 463 | ||
464 | void nilfs_mapping_init(struct address_space *mapping, struct inode *inode, | 464 | void nilfs_mapping_init(struct address_space *mapping, struct inode *inode) |
465 | struct backing_dev_info *bdi) | ||
466 | { | 465 | { |
467 | mapping->host = inode; | 466 | mapping->host = inode; |
468 | mapping->flags = 0; | 467 | mapping->flags = 0; |
469 | mapping_set_gfp_mask(mapping, GFP_NOFS); | 468 | mapping_set_gfp_mask(mapping, GFP_NOFS); |
470 | mapping->private_data = NULL; | 469 | mapping->private_data = NULL; |
471 | mapping->backing_dev_info = bdi; | ||
472 | mapping->a_ops = &empty_aops; | 470 | mapping->a_ops = &empty_aops; |
473 | } | 471 | } |
474 | 472 | ||
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index ef30c5c2426f..a43b8287d012 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h | |||
@@ -57,8 +57,7 @@ int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); | |||
57 | void nilfs_copy_back_pages(struct address_space *, struct address_space *); | 57 | void nilfs_copy_back_pages(struct address_space *, struct address_space *); |
58 | void nilfs_clear_dirty_page(struct page *, bool); | 58 | void nilfs_clear_dirty_page(struct page *, bool); |
59 | void nilfs_clear_dirty_pages(struct address_space *, bool); | 59 | void nilfs_clear_dirty_pages(struct address_space *, bool); |
60 | void nilfs_mapping_init(struct address_space *mapping, struct inode *inode, | 60 | void nilfs_mapping_init(struct address_space *mapping, struct inode *inode); |
61 | struct backing_dev_info *bdi); | ||
62 | unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); | 61 | unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); |
63 | unsigned long nilfs_find_uncommitted_extent(struct inode *inode, | 62 | unsigned long nilfs_find_uncommitted_extent(struct inode *inode, |
64 | sector_t start_blk, | 63 | sector_t start_blk, |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 2e5b3ec85b8f..5bc2a1cf73c3 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
@@ -166,7 +166,7 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) | |||
166 | ii->i_state = 0; | 166 | ii->i_state = 0; |
167 | ii->i_cno = 0; | 167 | ii->i_cno = 0; |
168 | ii->vfs_inode.i_version = 1; | 168 | ii->vfs_inode.i_version = 1; |
169 | nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode, sb->s_bdi); | 169 | nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode); |
170 | return &ii->vfs_inode; | 170 | return &ii->vfs_inode; |
171 | } | 171 | } |
172 | 172 | ||
@@ -1057,7 +1057,6 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) | |||
1057 | { | 1057 | { |
1058 | struct the_nilfs *nilfs; | 1058 | struct the_nilfs *nilfs; |
1059 | struct nilfs_root *fsroot; | 1059 | struct nilfs_root *fsroot; |
1060 | struct backing_dev_info *bdi; | ||
1061 | __u64 cno; | 1060 | __u64 cno; |
1062 | int err; | 1061 | int err; |
1063 | 1062 | ||
@@ -1077,8 +1076,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) | |||
1077 | sb->s_time_gran = 1; | 1076 | sb->s_time_gran = 1; |
1078 | sb->s_max_links = NILFS_LINK_MAX; | 1077 | sb->s_max_links = NILFS_LINK_MAX; |
1079 | 1078 | ||
1080 | bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; | 1079 | sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info; |
1081 | sb->s_bdi = bdi ? : &default_backing_dev_info; | ||
1082 | 1080 | ||
1083 | err = load_nilfs(nilfs, sb); | 1081 | err = load_nilfs(nilfs, sb); |
1084 | if (err) | 1082 | if (err) |
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig index 22c629eedd82..2a24249b30af 100644 --- a/fs/notify/Kconfig +++ b/fs/notify/Kconfig | |||
@@ -1,5 +1,6 @@ | |||
1 | config FSNOTIFY | 1 | config FSNOTIFY |
2 | def_bool n | 2 | def_bool n |
3 | select SRCU | ||
3 | 4 | ||
4 | source "fs/notify/dnotify/Kconfig" | 5 | source "fs/notify/dnotify/Kconfig" |
5 | source "fs/notify/inotify/Kconfig" | 6 | source "fs/notify/inotify/Kconfig" |
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 30d3addfad75..51ceb8107284 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c | |||
@@ -140,7 +140,7 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, | |||
140 | } | 140 | } |
141 | 141 | ||
142 | if (S_ISDIR(path->dentry->d_inode->i_mode) && | 142 | if (S_ISDIR(path->dentry->d_inode->i_mode) && |
143 | (marks_ignored_mask & FS_ISDIR)) | 143 | !(marks_mask & FS_ISDIR & ~marks_ignored_mask)) |
144 | return false; | 144 | return false; |
145 | 145 | ||
146 | if (event_mask & marks_mask & ~marks_ignored_mask) | 146 | if (event_mask & marks_mask & ~marks_ignored_mask) |
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index bff8567aa42d..cf275500a665 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c | |||
@@ -487,20 +487,27 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, | |||
487 | unsigned int flags, | 487 | unsigned int flags, |
488 | int *destroy) | 488 | int *destroy) |
489 | { | 489 | { |
490 | __u32 oldmask; | 490 | __u32 oldmask = 0; |
491 | 491 | ||
492 | spin_lock(&fsn_mark->lock); | 492 | spin_lock(&fsn_mark->lock); |
493 | if (!(flags & FAN_MARK_IGNORED_MASK)) { | 493 | if (!(flags & FAN_MARK_IGNORED_MASK)) { |
494 | __u32 tmask = fsn_mark->mask & ~mask; | ||
495 | |||
496 | if (flags & FAN_MARK_ONDIR) | ||
497 | tmask &= ~FAN_ONDIR; | ||
498 | |||
494 | oldmask = fsn_mark->mask; | 499 | oldmask = fsn_mark->mask; |
495 | fsnotify_set_mark_mask_locked(fsn_mark, (oldmask & ~mask)); | 500 | fsnotify_set_mark_mask_locked(fsn_mark, tmask); |
496 | } else { | 501 | } else { |
497 | oldmask = fsn_mark->ignored_mask; | 502 | __u32 tmask = fsn_mark->ignored_mask & ~mask; |
498 | fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask & ~mask)); | 503 | if (flags & FAN_MARK_ONDIR) |
504 | tmask &= ~FAN_ONDIR; | ||
505 | |||
506 | fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask); | ||
499 | } | 507 | } |
508 | *destroy = !(fsn_mark->mask | fsn_mark->ignored_mask); | ||
500 | spin_unlock(&fsn_mark->lock); | 509 | spin_unlock(&fsn_mark->lock); |
501 | 510 | ||
502 | *destroy = !(oldmask & ~mask); | ||
503 | |||
504 | return mask & oldmask; | 511 | return mask & oldmask; |
505 | } | 512 | } |
506 | 513 | ||
@@ -569,20 +576,22 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, | |||
569 | 576 | ||
570 | spin_lock(&fsn_mark->lock); | 577 | spin_lock(&fsn_mark->lock); |
571 | if (!(flags & FAN_MARK_IGNORED_MASK)) { | 578 | if (!(flags & FAN_MARK_IGNORED_MASK)) { |
579 | __u32 tmask = fsn_mark->mask | mask; | ||
580 | |||
581 | if (flags & FAN_MARK_ONDIR) | ||
582 | tmask |= FAN_ONDIR; | ||
583 | |||
572 | oldmask = fsn_mark->mask; | 584 | oldmask = fsn_mark->mask; |
573 | fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask)); | 585 | fsnotify_set_mark_mask_locked(fsn_mark, tmask); |
574 | } else { | 586 | } else { |
575 | __u32 tmask = fsn_mark->ignored_mask | mask; | 587 | __u32 tmask = fsn_mark->ignored_mask | mask; |
588 | if (flags & FAN_MARK_ONDIR) | ||
589 | tmask |= FAN_ONDIR; | ||
590 | |||
576 | fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask); | 591 | fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask); |
577 | if (flags & FAN_MARK_IGNORED_SURV_MODIFY) | 592 | if (flags & FAN_MARK_IGNORED_SURV_MODIFY) |
578 | fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; | 593 | fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; |
579 | } | 594 | } |
580 | |||
581 | if (!(flags & FAN_MARK_ONDIR)) { | ||
582 | __u32 tmask = fsn_mark->ignored_mask | FAN_ONDIR; | ||
583 | fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask); | ||
584 | } | ||
585 | |||
586 | spin_unlock(&fsn_mark->lock); | 595 | spin_unlock(&fsn_mark->lock); |
587 | 596 | ||
588 | return mask & ~oldmask; | 597 | return mask & ~oldmask; |
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 643faa44f22b..1da9b2d184dc 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c | |||
@@ -19,6 +19,7 @@ | |||
19 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 19 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/backing-dev.h> | ||
22 | #include <linux/buffer_head.h> | 23 | #include <linux/buffer_head.h> |
23 | #include <linux/gfp.h> | 24 | #include <linux/gfp.h> |
24 | #include <linux/pagemap.h> | 25 | #include <linux/pagemap.h> |
@@ -2091,7 +2092,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, | |||
2091 | count = iov_length(iov, nr_segs); | 2092 | count = iov_length(iov, nr_segs); |
2092 | pos = *ppos; | 2093 | pos = *ppos; |
2093 | /* We can write back this queue in page reclaim. */ | 2094 | /* We can write back this queue in page reclaim. */ |
2094 | current->backing_dev_info = mapping->backing_dev_info; | 2095 | current->backing_dev_info = inode_to_bdi(inode); |
2095 | written = 0; | 2096 | written = 0; |
2096 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 2097 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
2097 | if (err) | 2098 | if (err) |
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 7e8282dcea2a..c58a1bcfda0f 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c | |||
@@ -245,16 +245,14 @@ int ocfs2_set_acl(handle_t *handle, | |||
245 | ret = posix_acl_equiv_mode(acl, &mode); | 245 | ret = posix_acl_equiv_mode(acl, &mode); |
246 | if (ret < 0) | 246 | if (ret < 0) |
247 | return ret; | 247 | return ret; |
248 | else { | ||
249 | if (ret == 0) | ||
250 | acl = NULL; | ||
251 | 248 | ||
252 | ret = ocfs2_acl_set_mode(inode, di_bh, | 249 | if (ret == 0) |
253 | handle, mode); | 250 | acl = NULL; |
254 | if (ret) | ||
255 | return ret; | ||
256 | 251 | ||
257 | } | 252 | ret = ocfs2_acl_set_mode(inode, di_bh, |
253 | handle, mode); | ||
254 | if (ret) | ||
255 | return ret; | ||
258 | } | 256 | } |
259 | break; | 257 | break; |
260 | case ACL_TYPE_DEFAULT: | 258 | case ACL_TYPE_DEFAULT: |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index fcae9ef1a328..044158bd22be 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -6873,7 +6873,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
6873 | if (IS_ERR(handle)) { | 6873 | if (IS_ERR(handle)) { |
6874 | ret = PTR_ERR(handle); | 6874 | ret = PTR_ERR(handle); |
6875 | mlog_errno(ret); | 6875 | mlog_errno(ret); |
6876 | goto out_unlock; | 6876 | goto out; |
6877 | } | 6877 | } |
6878 | 6878 | ||
6879 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, | 6879 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, |
@@ -6931,7 +6931,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
6931 | if (ret) { | 6931 | if (ret) { |
6932 | mlog_errno(ret); | 6932 | mlog_errno(ret); |
6933 | need_free = 1; | 6933 | need_free = 1; |
6934 | goto out_commit; | 6934 | goto out_unlock; |
6935 | } | 6935 | } |
6936 | 6936 | ||
6937 | page_end = PAGE_CACHE_SIZE; | 6937 | page_end = PAGE_CACHE_SIZE; |
@@ -6964,12 +6964,16 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
6964 | if (ret) { | 6964 | if (ret) { |
6965 | mlog_errno(ret); | 6965 | mlog_errno(ret); |
6966 | need_free = 1; | 6966 | need_free = 1; |
6967 | goto out_commit; | 6967 | goto out_unlock; |
6968 | } | 6968 | } |
6969 | 6969 | ||
6970 | inode->i_blocks = ocfs2_inode_sector_count(inode); | 6970 | inode->i_blocks = ocfs2_inode_sector_count(inode); |
6971 | } | 6971 | } |
6972 | 6972 | ||
6973 | out_unlock: | ||
6974 | if (pages) | ||
6975 | ocfs2_unlock_and_free_pages(pages, num_pages); | ||
6976 | |||
6973 | out_commit: | 6977 | out_commit: |
6974 | if (ret < 0 && did_quota) | 6978 | if (ret < 0 && did_quota) |
6975 | dquot_free_space_nodirty(inode, | 6979 | dquot_free_space_nodirty(inode, |
@@ -6989,15 +6993,11 @@ out_commit: | |||
6989 | 6993 | ||
6990 | ocfs2_commit_trans(osb, handle); | 6994 | ocfs2_commit_trans(osb, handle); |
6991 | 6995 | ||
6992 | out_unlock: | 6996 | out: |
6993 | if (data_ac) | 6997 | if (data_ac) |
6994 | ocfs2_free_alloc_context(data_ac); | 6998 | ocfs2_free_alloc_context(data_ac); |
6995 | 6999 | if (pages) | |
6996 | out: | ||
6997 | if (pages) { | ||
6998 | ocfs2_unlock_and_free_pages(pages, num_pages); | ||
6999 | kfree(pages); | 7000 | kfree(pages); |
7000 | } | ||
7001 | 7001 | ||
7002 | return ret; | 7002 | return ret; |
7003 | } | 7003 | } |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 46d93e941f3d..44db1808cdb5 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/pipe_fs_i.h> | 28 | #include <linux/pipe_fs_i.h> |
29 | #include <linux/mpage.h> | 29 | #include <linux/mpage.h> |
30 | #include <linux/quotaops.h> | 30 | #include <linux/quotaops.h> |
31 | #include <linux/blkdev.h> | ||
31 | 32 | ||
32 | #include <cluster/masklog.h> | 33 | #include <cluster/masklog.h> |
33 | 34 | ||
@@ -47,6 +48,9 @@ | |||
47 | #include "ocfs2_trace.h" | 48 | #include "ocfs2_trace.h" |
48 | 49 | ||
49 | #include "buffer_head_io.h" | 50 | #include "buffer_head_io.h" |
51 | #include "dir.h" | ||
52 | #include "namei.h" | ||
53 | #include "sysfile.h" | ||
50 | 54 | ||
51 | static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, | 55 | static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, |
52 | struct buffer_head *bh_result, int create) | 56 | struct buffer_head *bh_result, int create) |
@@ -506,18 +510,21 @@ bail: | |||
506 | * | 510 | * |
507 | * called like this: dio->get_blocks(dio->inode, fs_startblk, | 511 | * called like this: dio->get_blocks(dio->inode, fs_startblk, |
508 | * fs_count, map_bh, dio->rw == WRITE); | 512 | * fs_count, map_bh, dio->rw == WRITE); |
509 | * | ||
510 | * Note that we never bother to allocate blocks here, and thus ignore the | ||
511 | * create argument. | ||
512 | */ | 513 | */ |
513 | static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, | 514 | static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, |
514 | struct buffer_head *bh_result, int create) | 515 | struct buffer_head *bh_result, int create) |
515 | { | 516 | { |
516 | int ret; | 517 | int ret; |
518 | u32 cpos = 0; | ||
519 | int alloc_locked = 0; | ||
517 | u64 p_blkno, inode_blocks, contig_blocks; | 520 | u64 p_blkno, inode_blocks, contig_blocks; |
518 | unsigned int ext_flags; | 521 | unsigned int ext_flags; |
519 | unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; | 522 | unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; |
520 | unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; | 523 | unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; |
524 | unsigned long len = bh_result->b_size; | ||
525 | unsigned int clusters_to_alloc = 0; | ||
526 | |||
527 | cpos = ocfs2_blocks_to_clusters(inode->i_sb, iblock); | ||
521 | 528 | ||
522 | /* This function won't even be called if the request isn't all | 529 | /* This function won't even be called if the request isn't all |
523 | * nicely aligned and of the right size, so there's no need | 530 | * nicely aligned and of the right size, so there's no need |
@@ -539,6 +546,40 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, | |||
539 | /* We should already CoW the refcounted extent in case of create. */ | 546 | /* We should already CoW the refcounted extent in case of create. */ |
540 | BUG_ON(create && (ext_flags & OCFS2_EXT_REFCOUNTED)); | 547 | BUG_ON(create && (ext_flags & OCFS2_EXT_REFCOUNTED)); |
541 | 548 | ||
549 | /* allocate blocks if no p_blkno is found, and create == 1 */ | ||
550 | if (!p_blkno && create) { | ||
551 | ret = ocfs2_inode_lock(inode, NULL, 1); | ||
552 | if (ret < 0) { | ||
553 | mlog_errno(ret); | ||
554 | goto bail; | ||
555 | } | ||
556 | |||
557 | alloc_locked = 1; | ||
558 | |||
559 | /* fill hole, allocate blocks can't be larger than the size | ||
560 | * of the hole */ | ||
561 | clusters_to_alloc = ocfs2_clusters_for_bytes(inode->i_sb, len); | ||
562 | if (clusters_to_alloc > contig_blocks) | ||
563 | clusters_to_alloc = contig_blocks; | ||
564 | |||
565 | /* allocate extent and insert them into the extent tree */ | ||
566 | ret = ocfs2_extend_allocation(inode, cpos, | ||
567 | clusters_to_alloc, 0); | ||
568 | if (ret < 0) { | ||
569 | mlog_errno(ret); | ||
570 | goto bail; | ||
571 | } | ||
572 | |||
573 | ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, | ||
574 | &contig_blocks, &ext_flags); | ||
575 | if (ret < 0) { | ||
576 | mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n", | ||
577 | (unsigned long long)iblock); | ||
578 | ret = -EIO; | ||
579 | goto bail; | ||
580 | } | ||
581 | } | ||
582 | |||
542 | /* | 583 | /* |
543 | * get_more_blocks() expects us to describe a hole by clearing | 584 | * get_more_blocks() expects us to describe a hole by clearing |
544 | * the mapped bit on bh_result(). | 585 | * the mapped bit on bh_result(). |
@@ -556,6 +597,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, | |||
556 | contig_blocks = max_blocks; | 597 | contig_blocks = max_blocks; |
557 | bh_result->b_size = contig_blocks << blocksize_bits; | 598 | bh_result->b_size = contig_blocks << blocksize_bits; |
558 | bail: | 599 | bail: |
600 | if (alloc_locked) | ||
601 | ocfs2_inode_unlock(inode, 1); | ||
559 | return ret; | 602 | return ret; |
560 | } | 603 | } |
561 | 604 | ||
@@ -597,6 +640,184 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait) | |||
597 | return try_to_free_buffers(page); | 640 | return try_to_free_buffers(page); |
598 | } | 641 | } |
599 | 642 | ||
643 | static int ocfs2_is_overwrite(struct ocfs2_super *osb, | ||
644 | struct inode *inode, loff_t offset) | ||
645 | { | ||
646 | int ret = 0; | ||
647 | u32 v_cpos = 0; | ||
648 | u32 p_cpos = 0; | ||
649 | unsigned int num_clusters = 0; | ||
650 | unsigned int ext_flags = 0; | ||
651 | |||
652 | v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); | ||
653 | ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, | ||
654 | &num_clusters, &ext_flags); | ||
655 | if (ret < 0) { | ||
656 | mlog_errno(ret); | ||
657 | return ret; | ||
658 | } | ||
659 | |||
660 | if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) | ||
661 | return 1; | ||
662 | |||
663 | return 0; | ||
664 | } | ||
665 | |||
666 | static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | ||
667 | struct iov_iter *iter, | ||
668 | loff_t offset) | ||
669 | { | ||
670 | ssize_t ret = 0; | ||
671 | ssize_t written = 0; | ||
672 | bool orphaned = false; | ||
673 | int is_overwrite = 0; | ||
674 | struct file *file = iocb->ki_filp; | ||
675 | struct inode *inode = file_inode(file)->i_mapping->host; | ||
676 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
677 | struct buffer_head *di_bh = NULL; | ||
678 | size_t count = iter->count; | ||
679 | journal_t *journal = osb->journal->j_journal; | ||
680 | u32 zero_len; | ||
681 | int cluster_align; | ||
682 | loff_t final_size = offset + count; | ||
683 | int append_write = offset >= i_size_read(inode) ? 1 : 0; | ||
684 | unsigned int num_clusters = 0; | ||
685 | unsigned int ext_flags = 0; | ||
686 | |||
687 | { | ||
688 | u64 o = offset; | ||
689 | |||
690 | zero_len = do_div(o, 1 << osb->s_clustersize_bits); | ||
691 | cluster_align = !zero_len; | ||
692 | } | ||
693 | |||
694 | /* | ||
695 | * when final_size > inode->i_size, inode->i_size will be | ||
696 | * updated after direct write, so add the inode to orphan | ||
697 | * dir first. | ||
698 | */ | ||
699 | if (final_size > i_size_read(inode)) { | ||
700 | ret = ocfs2_add_inode_to_orphan(osb, inode); | ||
701 | if (ret < 0) { | ||
702 | mlog_errno(ret); | ||
703 | goto out; | ||
704 | } | ||
705 | orphaned = true; | ||
706 | } | ||
707 | |||
708 | if (append_write) { | ||
709 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | ||
710 | if (ret < 0) { | ||
711 | mlog_errno(ret); | ||
712 | goto clean_orphan; | ||
713 | } | ||
714 | |||
715 | if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) | ||
716 | ret = ocfs2_zero_extend(inode, di_bh, offset); | ||
717 | else | ||
718 | ret = ocfs2_extend_no_holes(inode, di_bh, offset, | ||
719 | offset); | ||
720 | if (ret < 0) { | ||
721 | mlog_errno(ret); | ||
722 | ocfs2_inode_unlock(inode, 1); | ||
723 | brelse(di_bh); | ||
724 | goto clean_orphan; | ||
725 | } | ||
726 | |||
727 | is_overwrite = ocfs2_is_overwrite(osb, inode, offset); | ||
728 | if (is_overwrite < 0) { | ||
729 | mlog_errno(is_overwrite); | ||
730 | ocfs2_inode_unlock(inode, 1); | ||
731 | brelse(di_bh); | ||
732 | goto clean_orphan; | ||
733 | } | ||
734 | |||
735 | ocfs2_inode_unlock(inode, 1); | ||
736 | brelse(di_bh); | ||
737 | di_bh = NULL; | ||
738 | } | ||
739 | |||
740 | written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev, | ||
741 | iter, offset, | ||
742 | ocfs2_direct_IO_get_blocks, | ||
743 | ocfs2_dio_end_io, NULL, 0); | ||
744 | if (unlikely(written < 0)) { | ||
745 | loff_t i_size = i_size_read(inode); | ||
746 | |||
747 | if (offset + count > i_size) { | ||
748 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | ||
749 | if (ret < 0) { | ||
750 | mlog_errno(ret); | ||
751 | goto clean_orphan; | ||
752 | } | ||
753 | |||
754 | if (i_size == i_size_read(inode)) { | ||
755 | ret = ocfs2_truncate_file(inode, di_bh, | ||
756 | i_size); | ||
757 | if (ret < 0) { | ||
758 | if (ret != -ENOSPC) | ||
759 | mlog_errno(ret); | ||
760 | |||
761 | ocfs2_inode_unlock(inode, 1); | ||
762 | brelse(di_bh); | ||
763 | goto clean_orphan; | ||
764 | } | ||
765 | } | ||
766 | |||
767 | ocfs2_inode_unlock(inode, 1); | ||
768 | brelse(di_bh); | ||
769 | |||
770 | ret = jbd2_journal_force_commit(journal); | ||
771 | if (ret < 0) | ||
772 | mlog_errno(ret); | ||
773 | } | ||
774 | } else if (written < 0 && append_write && !is_overwrite && | ||
775 | !cluster_align) { | ||
776 | u32 p_cpos = 0; | ||
777 | u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); | ||
778 | |||
779 | ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, | ||
780 | &num_clusters, &ext_flags); | ||
781 | if (ret < 0) { | ||
782 | mlog_errno(ret); | ||
783 | goto clean_orphan; | ||
784 | } | ||
785 | |||
786 | BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN)); | ||
787 | |||
788 | ret = blkdev_issue_zeroout(osb->sb->s_bdev, | ||
789 | p_cpos << (osb->s_clustersize_bits - 9), | ||
790 | zero_len >> 9, GFP_KERNEL, false); | ||
791 | if (ret < 0) | ||
792 | mlog_errno(ret); | ||
793 | } | ||
794 | |||
795 | clean_orphan: | ||
796 | if (orphaned) { | ||
797 | int tmp_ret; | ||
798 | int update_isize = written > 0 ? 1 : 0; | ||
799 | loff_t end = update_isize ? offset + written : 0; | ||
800 | |||
801 | tmp_ret = ocfs2_del_inode_from_orphan(osb, inode, | ||
802 | update_isize, end); | ||
803 | if (tmp_ret < 0) { | ||
804 | ret = tmp_ret; | ||
805 | goto out; | ||
806 | } | ||
807 | |||
808 | tmp_ret = jbd2_journal_force_commit(journal); | ||
809 | if (tmp_ret < 0) { | ||
810 | ret = tmp_ret; | ||
811 | mlog_errno(tmp_ret); | ||
812 | } | ||
813 | } | ||
814 | |||
815 | out: | ||
816 | if (ret >= 0) | ||
817 | ret = written; | ||
818 | return ret; | ||
819 | } | ||
820 | |||
600 | static ssize_t ocfs2_direct_IO(int rw, | 821 | static ssize_t ocfs2_direct_IO(int rw, |
601 | struct kiocb *iocb, | 822 | struct kiocb *iocb, |
602 | struct iov_iter *iter, | 823 | struct iov_iter *iter, |
@@ -604,6 +825,9 @@ static ssize_t ocfs2_direct_IO(int rw, | |||
604 | { | 825 | { |
605 | struct file *file = iocb->ki_filp; | 826 | struct file *file = iocb->ki_filp; |
606 | struct inode *inode = file_inode(file)->i_mapping->host; | 827 | struct inode *inode = file_inode(file)->i_mapping->host; |
828 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
829 | int full_coherency = !(osb->s_mount_opt & | ||
830 | OCFS2_MOUNT_COHERENCY_BUFFERED); | ||
607 | 831 | ||
608 | /* | 832 | /* |
609 | * Fallback to buffered I/O if we see an inode without | 833 | * Fallback to buffered I/O if we see an inode without |
@@ -612,14 +836,20 @@ static ssize_t ocfs2_direct_IO(int rw, | |||
612 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 836 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
613 | return 0; | 837 | return 0; |
614 | 838 | ||
615 | /* Fallback to buffered I/O if we are appending. */ | 839 | /* Fallback to buffered I/O if we are appending and |
616 | if (i_size_read(inode) <= offset) | 840 | * concurrent O_DIRECT writes are allowed. |
841 | */ | ||
842 | if (i_size_read(inode) <= offset && !full_coherency) | ||
617 | return 0; | 843 | return 0; |
618 | 844 | ||
619 | return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, | 845 | if (rw == READ) |
846 | return __blockdev_direct_IO(rw, iocb, inode, | ||
847 | inode->i_sb->s_bdev, | ||
620 | iter, offset, | 848 | iter, offset, |
621 | ocfs2_direct_IO_get_blocks, | 849 | ocfs2_direct_IO_get_blocks, |
622 | ocfs2_dio_end_io, NULL, 0); | 850 | ocfs2_dio_end_io, NULL, 0); |
851 | else | ||
852 | return ocfs2_direct_IO_write(iocb, iter, offset); | ||
623 | } | 853 | } |
624 | 854 | ||
625 | static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb, | 855 | static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb, |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 2e355e0f8335..56c403a563bc 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -1016,7 +1016,8 @@ void o2net_fill_node_map(unsigned long *map, unsigned bytes) | |||
1016 | 1016 | ||
1017 | memset(map, 0, bytes); | 1017 | memset(map, 0, bytes); |
1018 | for (node = 0; node < O2NM_MAX_NODES; ++node) { | 1018 | for (node = 0; node < O2NM_MAX_NODES; ++node) { |
1019 | o2net_tx_can_proceed(o2net_nn_from_num(node), &sc, &ret); | 1019 | if (!o2net_tx_can_proceed(o2net_nn_from_num(node), &sc, &ret)) |
1020 | continue; | ||
1020 | if (!ret) { | 1021 | if (!ret) { |
1021 | set_bit(node, map); | 1022 | set_bit(node, map); |
1022 | sc_put(sc); | 1023 | sc_put(sc); |
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index dc024367110a..b95e7df5b76a 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
@@ -107,12 +107,12 @@ struct o2net_node { | |||
107 | struct list_head nn_status_list; | 107 | struct list_head nn_status_list; |
108 | 108 | ||
109 | /* connects are attempted from when heartbeat comes up until either hb | 109 | /* connects are attempted from when heartbeat comes up until either hb |
110 | * goes down, the node is unconfigured, no connect attempts succeed | 110 | * goes down, the node is unconfigured, or a connect succeeds. |
111 | * before O2NET_CONN_IDLE_DELAY, or a connect succeeds. connect_work | 111 | * connect_work is queued from set_nn_state both from hb up and from |
112 | * is queued from set_nn_state both from hb up and from itself if a | 112 | * itself if a connect attempt fails and so can be self-arming. |
113 | * connect attempt fails and so can be self-arming. shutdown is | 113 | * shutdown is careful to first mark the nn such that no connects will |
114 | * careful to first mark the nn such that no connects will be attempted | 114 | * be attempted before canceling delayed connect work and flushing the |
115 | * before canceling delayed connect work and flushing the queue. */ | 115 | * queue. */ |
116 | struct delayed_work nn_connect_work; | 116 | struct delayed_work nn_connect_work; |
117 | unsigned long nn_last_connect_attempt; | 117 | unsigned long nn_last_connect_attempt; |
118 | 118 | ||
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 319e786175af..b08050bd3f2e 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -3456,10 +3456,8 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name, | |||
3456 | int blocksize = dir->i_sb->s_blocksize; | 3456 | int blocksize = dir->i_sb->s_blocksize; |
3457 | 3457 | ||
3458 | status = ocfs2_read_dir_block(dir, 0, &bh, 0); | 3458 | status = ocfs2_read_dir_block(dir, 0, &bh, 0); |
3459 | if (status) { | 3459 | if (status) |
3460 | mlog_errno(status); | ||
3461 | goto bail; | 3460 | goto bail; |
3462 | } | ||
3463 | 3461 | ||
3464 | rec_len = OCFS2_DIR_REC_LEN(namelen); | 3462 | rec_len = OCFS2_DIR_REC_LEN(namelen); |
3465 | offset = 0; | 3463 | offset = 0; |
@@ -3480,10 +3478,9 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name, | |||
3480 | status = ocfs2_read_dir_block(dir, | 3478 | status = ocfs2_read_dir_block(dir, |
3481 | offset >> sb->s_blocksize_bits, | 3479 | offset >> sb->s_blocksize_bits, |
3482 | &bh, 0); | 3480 | &bh, 0); |
3483 | if (status) { | 3481 | if (status) |
3484 | mlog_errno(status); | ||
3485 | goto bail; | 3482 | goto bail; |
3486 | } | 3483 | |
3487 | /* move to next block */ | 3484 | /* move to next block */ |
3488 | de = (struct ocfs2_dir_entry *) bh->b_data; | 3485 | de = (struct ocfs2_dir_entry *) bh->b_data; |
3489 | } | 3486 | } |
@@ -3513,7 +3510,6 @@ next: | |||
3513 | de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len)); | 3510 | de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len)); |
3514 | } | 3511 | } |
3515 | 3512 | ||
3516 | status = 0; | ||
3517 | bail: | 3513 | bail: |
3518 | brelse(bh); | 3514 | brelse(bh); |
3519 | if (status) | 3515 | if (status) |
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index b46278f9ae44..fd6bbbbd7d78 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c | |||
@@ -385,8 +385,12 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, | |||
385 | head = &res->granted; | 385 | head = &res->granted; |
386 | 386 | ||
387 | list_for_each_entry(lock, head, list) { | 387 | list_for_each_entry(lock, head, list) { |
388 | if (lock->ml.cookie == cookie) | 388 | /* if lock is found but unlock is pending ignore the bast */ |
389 | if (lock->ml.cookie == cookie) { | ||
390 | if (lock->unlock_pending) | ||
391 | break; | ||
389 | goto do_ast; | 392 | goto do_ast; |
393 | } | ||
390 | } | 394 | } |
391 | 395 | ||
392 | mlog(0, "Got %sast for unknown lock! cookie=%u:%llu, name=%.*s, " | 396 | mlog(0, "Got %sast for unknown lock! cookie=%u:%llu, name=%.*s, " |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 149eb556b8c6..825136070d2c 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
@@ -406,7 +406,7 @@ static int debug_purgelist_print(struct dlm_ctxt *dlm, char *buf, int len) | |||
406 | } | 406 | } |
407 | spin_unlock(&dlm->spinlock); | 407 | spin_unlock(&dlm->spinlock); |
408 | 408 | ||
409 | out += snprintf(buf + out, len - out, "Total on list: %ld\n", total); | 409 | out += snprintf(buf + out, len - out, "Total on list: %lu\n", total); |
410 | 410 | ||
411 | return out; | 411 | return out; |
412 | } | 412 | } |
@@ -464,7 +464,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len) | |||
464 | spin_unlock(&dlm->master_lock); | 464 | spin_unlock(&dlm->master_lock); |
465 | 465 | ||
466 | out += snprintf(buf + out, len - out, | 466 | out += snprintf(buf + out, len - out, |
467 | "Total: %ld, Longest: %ld\n", total, longest); | 467 | "Total: %lu, Longest: %lu\n", total, longest); |
468 | return out; | 468 | return out; |
469 | } | 469 | } |
470 | 470 | ||
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 50a59d2337b2..7df88a6dd626 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -674,20 +674,6 @@ static void dlm_leave_domain(struct dlm_ctxt *dlm) | |||
674 | spin_unlock(&dlm->spinlock); | 674 | spin_unlock(&dlm->spinlock); |
675 | } | 675 | } |
676 | 676 | ||
677 | int dlm_joined(struct dlm_ctxt *dlm) | ||
678 | { | ||
679 | int ret = 0; | ||
680 | |||
681 | spin_lock(&dlm_domain_lock); | ||
682 | |||
683 | if (dlm->dlm_state == DLM_CTXT_JOINED) | ||
684 | ret = 1; | ||
685 | |||
686 | spin_unlock(&dlm_domain_lock); | ||
687 | |||
688 | return ret; | ||
689 | } | ||
690 | |||
691 | int dlm_shutting_down(struct dlm_ctxt *dlm) | 677 | int dlm_shutting_down(struct dlm_ctxt *dlm) |
692 | { | 678 | { |
693 | int ret = 0; | 679 | int ret = 0; |
diff --git a/fs/ocfs2/dlm/dlmdomain.h b/fs/ocfs2/dlm/dlmdomain.h index 2f7f60bfeb3b..fd6122a38dbd 100644 --- a/fs/ocfs2/dlm/dlmdomain.h +++ b/fs/ocfs2/dlm/dlmdomain.h | |||
@@ -28,7 +28,6 @@ | |||
28 | extern spinlock_t dlm_domain_lock; | 28 | extern spinlock_t dlm_domain_lock; |
29 | extern struct list_head dlm_domains; | 29 | extern struct list_head dlm_domains; |
30 | 30 | ||
31 | int dlm_joined(struct dlm_ctxt *dlm); | ||
32 | int dlm_shutting_down(struct dlm_ctxt *dlm); | 31 | int dlm_shutting_down(struct dlm_ctxt *dlm); |
33 | void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm, | 32 | void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm, |
34 | int node_num); | 33 | int node_num); |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index cecd875653e4..ce12e0b1a31f 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -1070,6 +1070,9 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm, | |||
1070 | dead_node, dlm->name); | 1070 | dead_node, dlm->name); |
1071 | list_del_init(&lock->list); | 1071 | list_del_init(&lock->list); |
1072 | dlm_lock_put(lock); | 1072 | dlm_lock_put(lock); |
1073 | /* Can't schedule DLM_UNLOCK_FREE_LOCK | ||
1074 | * - do manually */ | ||
1075 | dlm_lock_put(lock); | ||
1073 | break; | 1076 | break; |
1074 | } | 1077 | } |
1075 | } | 1078 | } |
@@ -2346,6 +2349,10 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) | |||
2346 | dead_node, dlm->name); | 2349 | dead_node, dlm->name); |
2347 | list_del_init(&lock->list); | 2350 | list_del_init(&lock->list); |
2348 | dlm_lock_put(lock); | 2351 | dlm_lock_put(lock); |
2352 | /* Can't schedule | ||
2353 | * DLM_UNLOCK_FREE_LOCK | ||
2354 | * - do manually */ | ||
2355 | dlm_lock_put(lock); | ||
2349 | break; | 2356 | break; |
2350 | } | 2357 | } |
2351 | } | 2358 | } |
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 57c40e34f56f..061ba6a91bf2 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c | |||
@@ -390,12 +390,6 @@ clear_fields: | |||
390 | ip->ip_conn = NULL; | 390 | ip->ip_conn = NULL; |
391 | } | 391 | } |
392 | 392 | ||
393 | static struct backing_dev_info dlmfs_backing_dev_info = { | ||
394 | .name = "ocfs2-dlmfs", | ||
395 | .ra_pages = 0, /* No readahead */ | ||
396 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | ||
397 | }; | ||
398 | |||
399 | static struct inode *dlmfs_get_root_inode(struct super_block *sb) | 393 | static struct inode *dlmfs_get_root_inode(struct super_block *sb) |
400 | { | 394 | { |
401 | struct inode *inode = new_inode(sb); | 395 | struct inode *inode = new_inode(sb); |
@@ -404,7 +398,6 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb) | |||
404 | if (inode) { | 398 | if (inode) { |
405 | inode->i_ino = get_next_ino(); | 399 | inode->i_ino = get_next_ino(); |
406 | inode_init_owner(inode, NULL, mode); | 400 | inode_init_owner(inode, NULL, mode); |
407 | inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; | ||
408 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 401 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
409 | inc_nlink(inode); | 402 | inc_nlink(inode); |
410 | 403 | ||
@@ -428,7 +421,6 @@ static struct inode *dlmfs_get_inode(struct inode *parent, | |||
428 | 421 | ||
429 | inode->i_ino = get_next_ino(); | 422 | inode->i_ino = get_next_ino(); |
430 | inode_init_owner(inode, parent, mode); | 423 | inode_init_owner(inode, parent, mode); |
431 | inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; | ||
432 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 424 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
433 | 425 | ||
434 | ip = DLMFS_I(inode); | 426 | ip = DLMFS_I(inode); |
@@ -643,10 +635,6 @@ static int __init init_dlmfs_fs(void) | |||
643 | int status; | 635 | int status; |
644 | int cleanup_inode = 0, cleanup_worker = 0; | 636 | int cleanup_inode = 0, cleanup_worker = 0; |
645 | 637 | ||
646 | status = bdi_init(&dlmfs_backing_dev_info); | ||
647 | if (status) | ||
648 | return status; | ||
649 | |||
650 | dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache", | 638 | dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache", |
651 | sizeof(struct dlmfs_inode_private), | 639 | sizeof(struct dlmfs_inode_private), |
652 | 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| | 640 | 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| |
@@ -673,7 +661,6 @@ bail: | |||
673 | kmem_cache_destroy(dlmfs_inode_cache); | 661 | kmem_cache_destroy(dlmfs_inode_cache); |
674 | if (cleanup_worker) | 662 | if (cleanup_worker) |
675 | destroy_workqueue(user_dlm_worker); | 663 | destroy_workqueue(user_dlm_worker); |
676 | bdi_destroy(&dlmfs_backing_dev_info); | ||
677 | } else | 664 | } else |
678 | printk("OCFS2 User DLM kernel interface loaded\n"); | 665 | printk("OCFS2 User DLM kernel interface loaded\n"); |
679 | return status; | 666 | return status; |
@@ -693,7 +680,6 @@ static void __exit exit_dlmfs_fs(void) | |||
693 | rcu_barrier(); | 680 | rcu_barrier(); |
694 | kmem_cache_destroy(dlmfs_inode_cache); | 681 | kmem_cache_destroy(dlmfs_inode_cache); |
695 | 682 | ||
696 | bdi_destroy(&dlmfs_backing_dev_info); | ||
697 | } | 683 | } |
698 | 684 | ||
699 | MODULE_AUTHOR("Oracle"); | 685 | MODULE_AUTHOR("Oracle"); |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 1c423af04c69..11849a44dc5a 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -3750,6 +3750,9 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, | |||
3750 | break; | 3750 | break; |
3751 | spin_unlock(&dentry_attach_lock); | 3751 | spin_unlock(&dentry_attach_lock); |
3752 | 3752 | ||
3753 | if (S_ISDIR(dl->dl_inode->i_mode)) | ||
3754 | shrink_dcache_parent(dentry); | ||
3755 | |||
3753 | mlog(0, "d_delete(%pd);\n", dentry); | 3756 | mlog(0, "d_delete(%pd);\n", dentry); |
3754 | 3757 | ||
3755 | /* | 3758 | /* |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 3950693dd0f6..46e0d4e857c7 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -295,7 +295,7 @@ out: | |||
295 | return ret; | 295 | return ret; |
296 | } | 296 | } |
297 | 297 | ||
298 | static int ocfs2_set_inode_size(handle_t *handle, | 298 | int ocfs2_set_inode_size(handle_t *handle, |
299 | struct inode *inode, | 299 | struct inode *inode, |
300 | struct buffer_head *fe_bh, | 300 | struct buffer_head *fe_bh, |
301 | u64 new_i_size) | 301 | u64 new_i_size) |
@@ -441,7 +441,7 @@ out: | |||
441 | return status; | 441 | return status; |
442 | } | 442 | } |
443 | 443 | ||
444 | static int ocfs2_truncate_file(struct inode *inode, | 444 | int ocfs2_truncate_file(struct inode *inode, |
445 | struct buffer_head *di_bh, | 445 | struct buffer_head *di_bh, |
446 | u64 new_i_size) | 446 | u64 new_i_size) |
447 | { | 447 | { |
@@ -569,7 +569,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, | |||
569 | handle_t *handle = NULL; | 569 | handle_t *handle = NULL; |
570 | struct ocfs2_alloc_context *data_ac = NULL; | 570 | struct ocfs2_alloc_context *data_ac = NULL; |
571 | struct ocfs2_alloc_context *meta_ac = NULL; | 571 | struct ocfs2_alloc_context *meta_ac = NULL; |
572 | enum ocfs2_alloc_restarted why; | 572 | enum ocfs2_alloc_restarted why = RESTART_NONE; |
573 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 573 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
574 | struct ocfs2_extent_tree et; | 574 | struct ocfs2_extent_tree et; |
575 | int did_quota = 0; | 575 | int did_quota = 0; |
@@ -709,6 +709,13 @@ leave: | |||
709 | return status; | 709 | return status; |
710 | } | 710 | } |
711 | 711 | ||
712 | int ocfs2_extend_allocation(struct inode *inode, u32 logical_start, | ||
713 | u32 clusters_to_add, int mark_unwritten) | ||
714 | { | ||
715 | return __ocfs2_extend_allocation(inode, logical_start, | ||
716 | clusters_to_add, mark_unwritten); | ||
717 | } | ||
718 | |||
712 | /* | 719 | /* |
713 | * While a write will already be ordering the data, a truncate will not. | 720 | * While a write will already be ordering the data, a truncate will not. |
714 | * Thus, we need to explicitly order the zeroed pages. | 721 | * Thus, we need to explicitly order the zeroed pages. |
@@ -2109,6 +2116,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file, | |||
2109 | struct dentry *dentry = file->f_path.dentry; | 2116 | struct dentry *dentry = file->f_path.dentry; |
2110 | struct inode *inode = dentry->d_inode; | 2117 | struct inode *inode = dentry->d_inode; |
2111 | loff_t saved_pos = 0, end; | 2118 | loff_t saved_pos = 0, end; |
2119 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
2120 | int full_coherency = !(osb->s_mount_opt & | ||
2121 | OCFS2_MOUNT_COHERENCY_BUFFERED); | ||
2112 | 2122 | ||
2113 | /* | 2123 | /* |
2114 | * We start with a read level meta lock and only jump to an ex | 2124 | * We start with a read level meta lock and only jump to an ex |
@@ -2197,7 +2207,16 @@ static int ocfs2_prepare_inode_for_write(struct file *file, | |||
2197 | * one node could wind up truncating another | 2207 | * one node could wind up truncating another |
2198 | * nodes writes. | 2208 | * nodes writes. |
2199 | */ | 2209 | */ |
2200 | if (end > i_size_read(inode)) { | 2210 | if (end > i_size_read(inode) && !full_coherency) { |
2211 | *direct_io = 0; | ||
2212 | break; | ||
2213 | } | ||
2214 | |||
2215 | /* | ||
2216 | * Fallback to old way if the feature bit is not set. | ||
2217 | */ | ||
2218 | if (end > i_size_read(inode) && | ||
2219 | !ocfs2_supports_append_dio(osb)) { | ||
2201 | *direct_io = 0; | 2220 | *direct_io = 0; |
2202 | break; | 2221 | break; |
2203 | } | 2222 | } |
@@ -2210,7 +2229,13 @@ static int ocfs2_prepare_inode_for_write(struct file *file, | |||
2210 | */ | 2229 | */ |
2211 | ret = ocfs2_check_range_for_holes(inode, saved_pos, count); | 2230 | ret = ocfs2_check_range_for_holes(inode, saved_pos, count); |
2212 | if (ret == 1) { | 2231 | if (ret == 1) { |
2213 | *direct_io = 0; | 2232 | /* |
2233 | * Fallback to old way if the feature bit is not set. | ||
2234 | * Otherwise try dio first and then complete the rest | ||
2235 | * request through buffer io. | ||
2236 | */ | ||
2237 | if (!ocfs2_supports_append_dio(osb)) | ||
2238 | *direct_io = 0; | ||
2214 | ret = 0; | 2239 | ret = 0; |
2215 | } else if (ret < 0) | 2240 | } else if (ret < 0) |
2216 | mlog_errno(ret); | 2241 | mlog_errno(ret); |
@@ -2243,6 +2268,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, | |||
2243 | u32 old_clusters; | 2268 | u32 old_clusters; |
2244 | struct file *file = iocb->ki_filp; | 2269 | struct file *file = iocb->ki_filp; |
2245 | struct inode *inode = file_inode(file); | 2270 | struct inode *inode = file_inode(file); |
2271 | struct address_space *mapping = file->f_mapping; | ||
2246 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2272 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
2247 | int full_coherency = !(osb->s_mount_opt & | 2273 | int full_coherency = !(osb->s_mount_opt & |
2248 | OCFS2_MOUNT_COHERENCY_BUFFERED); | 2274 | OCFS2_MOUNT_COHERENCY_BUFFERED); |
@@ -2357,13 +2383,53 @@ relock: | |||
2357 | 2383 | ||
2358 | iov_iter_truncate(from, count); | 2384 | iov_iter_truncate(from, count); |
2359 | if (direct_io) { | 2385 | if (direct_io) { |
2386 | loff_t endbyte; | ||
2387 | ssize_t written_buffered; | ||
2360 | written = generic_file_direct_write(iocb, from, *ppos); | 2388 | written = generic_file_direct_write(iocb, from, *ppos); |
2361 | if (written < 0) { | 2389 | if (written < 0 || written == count) { |
2362 | ret = written; | 2390 | ret = written; |
2363 | goto out_dio; | 2391 | goto out_dio; |
2364 | } | 2392 | } |
2393 | |||
2394 | /* | ||
2395 | * for completing the rest of the request. | ||
2396 | */ | ||
2397 | *ppos += written; | ||
2398 | count -= written; | ||
2399 | written_buffered = generic_perform_write(file, from, *ppos); | ||
2400 | /* | ||
2401 | * If generic_file_buffered_write() returned a synchronous error | ||
2402 | * then we want to return the number of bytes which were | ||
2403 | * direct-written, or the error code if that was zero. Note | ||
2404 | * that this differs from normal direct-io semantics, which | ||
2405 | * will return -EFOO even if some bytes were written. | ||
2406 | */ | ||
2407 | if (written_buffered < 0) { | ||
2408 | ret = written_buffered; | ||
2409 | goto out_dio; | ||
2410 | } | ||
2411 | |||
2412 | iocb->ki_pos = *ppos + written_buffered; | ||
2413 | /* We need to ensure that the page cache pages are written to | ||
2414 | * disk and invalidated to preserve the expected O_DIRECT | ||
2415 | * semantics. | ||
2416 | */ | ||
2417 | endbyte = *ppos + written_buffered - 1; | ||
2418 | ret = filemap_write_and_wait_range(file->f_mapping, *ppos, | ||
2419 | endbyte); | ||
2420 | if (ret == 0) { | ||
2421 | written += written_buffered; | ||
2422 | invalidate_mapping_pages(mapping, | ||
2423 | *ppos >> PAGE_CACHE_SHIFT, | ||
2424 | endbyte >> PAGE_CACHE_SHIFT); | ||
2425 | } else { | ||
2426 | /* | ||
2427 | * We don't know how much we wrote, so just return | ||
2428 | * the number of bytes which were direct-written | ||
2429 | */ | ||
2430 | } | ||
2365 | } else { | 2431 | } else { |
2366 | current->backing_dev_info = file->f_mapping->backing_dev_info; | 2432 | current->backing_dev_info = inode_to_bdi(inode); |
2367 | written = generic_perform_write(file, from, *ppos); | 2433 | written = generic_perform_write(file, from, *ppos); |
2368 | if (likely(written >= 0)) | 2434 | if (likely(written >= 0)) |
2369 | iocb->ki_pos = *ppos + written; | 2435 | iocb->ki_pos = *ppos + written; |
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 97bf761c9e7c..e8c62f22215c 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h | |||
@@ -51,13 +51,22 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb, | |||
51 | struct ocfs2_alloc_context *data_ac, | 51 | struct ocfs2_alloc_context *data_ac, |
52 | struct ocfs2_alloc_context *meta_ac, | 52 | struct ocfs2_alloc_context *meta_ac, |
53 | enum ocfs2_alloc_restarted *reason_ret); | 53 | enum ocfs2_alloc_restarted *reason_ret); |
54 | int ocfs2_set_inode_size(handle_t *handle, | ||
55 | struct inode *inode, | ||
56 | struct buffer_head *fe_bh, | ||
57 | u64 new_i_size); | ||
54 | int ocfs2_simple_size_update(struct inode *inode, | 58 | int ocfs2_simple_size_update(struct inode *inode, |
55 | struct buffer_head *di_bh, | 59 | struct buffer_head *di_bh, |
56 | u64 new_i_size); | 60 | u64 new_i_size); |
61 | int ocfs2_truncate_file(struct inode *inode, | ||
62 | struct buffer_head *di_bh, | ||
63 | u64 new_i_size); | ||
57 | int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh, | 64 | int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh, |
58 | u64 new_i_size, u64 zero_to); | 65 | u64 new_i_size, u64 zero_to); |
59 | int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, | 66 | int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, |
60 | loff_t zero_to); | 67 | loff_t zero_to); |
68 | int ocfs2_extend_allocation(struct inode *inode, u32 logical_start, | ||
69 | u32 clusters_to_add, int mark_unwritten); | ||
61 | int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); | 70 | int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); |
62 | int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, | 71 | int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, |
63 | struct kstat *stat); | 72 | struct kstat *stat); |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index c8b25de9efbb..3025c0da6b8a 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -648,7 +648,7 @@ static int ocfs2_remove_inode(struct inode *inode, | |||
648 | 648 | ||
649 | if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) { | 649 | if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) { |
650 | status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, | 650 | status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, |
651 | orphan_dir_bh); | 651 | orphan_dir_bh, false); |
652 | if (status < 0) { | 652 | if (status < 0) { |
653 | mlog_errno(status); | 653 | mlog_errno(status); |
654 | goto bail_commit; | 654 | goto bail_commit; |
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index ca3431ee7f24..5e86b247c821 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
@@ -81,6 +81,8 @@ struct ocfs2_inode_info | |||
81 | tid_t i_sync_tid; | 81 | tid_t i_sync_tid; |
82 | tid_t i_datasync_tid; | 82 | tid_t i_datasync_tid; |
83 | 83 | ||
84 | wait_queue_head_t append_dio_wq; | ||
85 | |||
84 | struct dquot *i_dquot[MAXQUOTAS]; | 86 | struct dquot *i_dquot[MAXQUOTAS]; |
85 | }; | 87 | }; |
86 | 88 | ||
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 4f502382180f..ff531928269e 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -50,6 +50,8 @@ | |||
50 | #include "sysfile.h" | 50 | #include "sysfile.h" |
51 | #include "uptodate.h" | 51 | #include "uptodate.h" |
52 | #include "quota.h" | 52 | #include "quota.h" |
53 | #include "file.h" | ||
54 | #include "namei.h" | ||
53 | 55 | ||
54 | #include "buffer_head_io.h" | 56 | #include "buffer_head_io.h" |
55 | #include "ocfs2_trace.h" | 57 | #include "ocfs2_trace.h" |
@@ -69,13 +71,15 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, | |||
69 | static int ocfs2_trylock_journal(struct ocfs2_super *osb, | 71 | static int ocfs2_trylock_journal(struct ocfs2_super *osb, |
70 | int slot_num); | 72 | int slot_num); |
71 | static int ocfs2_recover_orphans(struct ocfs2_super *osb, | 73 | static int ocfs2_recover_orphans(struct ocfs2_super *osb, |
72 | int slot); | 74 | int slot, |
75 | enum ocfs2_orphan_reco_type orphan_reco_type); | ||
73 | static int ocfs2_commit_thread(void *arg); | 76 | static int ocfs2_commit_thread(void *arg); |
74 | static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, | 77 | static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, |
75 | int slot_num, | 78 | int slot_num, |
76 | struct ocfs2_dinode *la_dinode, | 79 | struct ocfs2_dinode *la_dinode, |
77 | struct ocfs2_dinode *tl_dinode, | 80 | struct ocfs2_dinode *tl_dinode, |
78 | struct ocfs2_quota_recovery *qrec); | 81 | struct ocfs2_quota_recovery *qrec, |
82 | enum ocfs2_orphan_reco_type orphan_reco_type); | ||
79 | 83 | ||
80 | static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb) | 84 | static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb) |
81 | { | 85 | { |
@@ -149,7 +153,8 @@ int ocfs2_compute_replay_slots(struct ocfs2_super *osb) | |||
149 | return 0; | 153 | return 0; |
150 | } | 154 | } |
151 | 155 | ||
152 | void ocfs2_queue_replay_slots(struct ocfs2_super *osb) | 156 | void ocfs2_queue_replay_slots(struct ocfs2_super *osb, |
157 | enum ocfs2_orphan_reco_type orphan_reco_type) | ||
153 | { | 158 | { |
154 | struct ocfs2_replay_map *replay_map = osb->replay_map; | 159 | struct ocfs2_replay_map *replay_map = osb->replay_map; |
155 | int i; | 160 | int i; |
@@ -163,7 +168,8 @@ void ocfs2_queue_replay_slots(struct ocfs2_super *osb) | |||
163 | for (i = 0; i < replay_map->rm_slots; i++) | 168 | for (i = 0; i < replay_map->rm_slots; i++) |
164 | if (replay_map->rm_replay_slots[i]) | 169 | if (replay_map->rm_replay_slots[i]) |
165 | ocfs2_queue_recovery_completion(osb->journal, i, NULL, | 170 | ocfs2_queue_recovery_completion(osb->journal, i, NULL, |
166 | NULL, NULL); | 171 | NULL, NULL, |
172 | orphan_reco_type); | ||
167 | replay_map->rm_state = REPLAY_DONE; | 173 | replay_map->rm_state = REPLAY_DONE; |
168 | } | 174 | } |
169 | 175 | ||
@@ -1174,6 +1180,7 @@ struct ocfs2_la_recovery_item { | |||
1174 | struct ocfs2_dinode *lri_la_dinode; | 1180 | struct ocfs2_dinode *lri_la_dinode; |
1175 | struct ocfs2_dinode *lri_tl_dinode; | 1181 | struct ocfs2_dinode *lri_tl_dinode; |
1176 | struct ocfs2_quota_recovery *lri_qrec; | 1182 | struct ocfs2_quota_recovery *lri_qrec; |
1183 | enum ocfs2_orphan_reco_type lri_orphan_reco_type; | ||
1177 | }; | 1184 | }; |
1178 | 1185 | ||
1179 | /* Does the second half of the recovery process. By this point, the | 1186 | /* Does the second half of the recovery process. By this point, the |
@@ -1195,6 +1202,7 @@ void ocfs2_complete_recovery(struct work_struct *work) | |||
1195 | struct ocfs2_dinode *la_dinode, *tl_dinode; | 1202 | struct ocfs2_dinode *la_dinode, *tl_dinode; |
1196 | struct ocfs2_la_recovery_item *item, *n; | 1203 | struct ocfs2_la_recovery_item *item, *n; |
1197 | struct ocfs2_quota_recovery *qrec; | 1204 | struct ocfs2_quota_recovery *qrec; |
1205 | enum ocfs2_orphan_reco_type orphan_reco_type; | ||
1198 | LIST_HEAD(tmp_la_list); | 1206 | LIST_HEAD(tmp_la_list); |
1199 | 1207 | ||
1200 | trace_ocfs2_complete_recovery( | 1208 | trace_ocfs2_complete_recovery( |
@@ -1212,6 +1220,7 @@ void ocfs2_complete_recovery(struct work_struct *work) | |||
1212 | la_dinode = item->lri_la_dinode; | 1220 | la_dinode = item->lri_la_dinode; |
1213 | tl_dinode = item->lri_tl_dinode; | 1221 | tl_dinode = item->lri_tl_dinode; |
1214 | qrec = item->lri_qrec; | 1222 | qrec = item->lri_qrec; |
1223 | orphan_reco_type = item->lri_orphan_reco_type; | ||
1215 | 1224 | ||
1216 | trace_ocfs2_complete_recovery_slot(item->lri_slot, | 1225 | trace_ocfs2_complete_recovery_slot(item->lri_slot, |
1217 | la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0, | 1226 | la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0, |
@@ -1236,7 +1245,8 @@ void ocfs2_complete_recovery(struct work_struct *work) | |||
1236 | kfree(tl_dinode); | 1245 | kfree(tl_dinode); |
1237 | } | 1246 | } |
1238 | 1247 | ||
1239 | ret = ocfs2_recover_orphans(osb, item->lri_slot); | 1248 | ret = ocfs2_recover_orphans(osb, item->lri_slot, |
1249 | orphan_reco_type); | ||
1240 | if (ret < 0) | 1250 | if (ret < 0) |
1241 | mlog_errno(ret); | 1251 | mlog_errno(ret); |
1242 | 1252 | ||
@@ -1261,7 +1271,8 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, | |||
1261 | int slot_num, | 1271 | int slot_num, |
1262 | struct ocfs2_dinode *la_dinode, | 1272 | struct ocfs2_dinode *la_dinode, |
1263 | struct ocfs2_dinode *tl_dinode, | 1273 | struct ocfs2_dinode *tl_dinode, |
1264 | struct ocfs2_quota_recovery *qrec) | 1274 | struct ocfs2_quota_recovery *qrec, |
1275 | enum ocfs2_orphan_reco_type orphan_reco_type) | ||
1265 | { | 1276 | { |
1266 | struct ocfs2_la_recovery_item *item; | 1277 | struct ocfs2_la_recovery_item *item; |
1267 | 1278 | ||
@@ -1285,6 +1296,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, | |||
1285 | item->lri_slot = slot_num; | 1296 | item->lri_slot = slot_num; |
1286 | item->lri_tl_dinode = tl_dinode; | 1297 | item->lri_tl_dinode = tl_dinode; |
1287 | item->lri_qrec = qrec; | 1298 | item->lri_qrec = qrec; |
1299 | item->lri_orphan_reco_type = orphan_reco_type; | ||
1288 | 1300 | ||
1289 | spin_lock(&journal->j_lock); | 1301 | spin_lock(&journal->j_lock); |
1290 | list_add_tail(&item->lri_list, &journal->j_la_cleanups); | 1302 | list_add_tail(&item->lri_list, &journal->j_la_cleanups); |
@@ -1304,7 +1316,8 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb) | |||
1304 | /* No need to queue up our truncate_log as regular cleanup will catch | 1316 | /* No need to queue up our truncate_log as regular cleanup will catch |
1305 | * that */ | 1317 | * that */ |
1306 | ocfs2_queue_recovery_completion(journal, osb->slot_num, | 1318 | ocfs2_queue_recovery_completion(journal, osb->slot_num, |
1307 | osb->local_alloc_copy, NULL, NULL); | 1319 | osb->local_alloc_copy, NULL, NULL, |
1320 | ORPHAN_NEED_TRUNCATE); | ||
1308 | ocfs2_schedule_truncate_log_flush(osb, 0); | 1321 | ocfs2_schedule_truncate_log_flush(osb, 0); |
1309 | 1322 | ||
1310 | osb->local_alloc_copy = NULL; | 1323 | osb->local_alloc_copy = NULL; |
@@ -1312,7 +1325,7 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb) | |||
1312 | 1325 | ||
1313 | /* queue to recover orphan slots for all offline slots */ | 1326 | /* queue to recover orphan slots for all offline slots */ |
1314 | ocfs2_replay_map_set_state(osb, REPLAY_NEEDED); | 1327 | ocfs2_replay_map_set_state(osb, REPLAY_NEEDED); |
1315 | ocfs2_queue_replay_slots(osb); | 1328 | ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE); |
1316 | ocfs2_free_replay_slots(osb); | 1329 | ocfs2_free_replay_slots(osb); |
1317 | } | 1330 | } |
1318 | 1331 | ||
@@ -1323,7 +1336,8 @@ void ocfs2_complete_quota_recovery(struct ocfs2_super *osb) | |||
1323 | osb->slot_num, | 1336 | osb->slot_num, |
1324 | NULL, | 1337 | NULL, |
1325 | NULL, | 1338 | NULL, |
1326 | osb->quota_rec); | 1339 | osb->quota_rec, |
1340 | ORPHAN_NEED_TRUNCATE); | ||
1327 | osb->quota_rec = NULL; | 1341 | osb->quota_rec = NULL; |
1328 | } | 1342 | } |
1329 | } | 1343 | } |
@@ -1360,7 +1374,7 @@ restart: | |||
1360 | 1374 | ||
1361 | /* queue recovery for our own slot */ | 1375 | /* queue recovery for our own slot */ |
1362 | ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, | 1376 | ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, |
1363 | NULL, NULL); | 1377 | NULL, NULL, ORPHAN_NO_NEED_TRUNCATE); |
1364 | 1378 | ||
1365 | spin_lock(&osb->osb_lock); | 1379 | spin_lock(&osb->osb_lock); |
1366 | while (rm->rm_used) { | 1380 | while (rm->rm_used) { |
@@ -1419,13 +1433,14 @@ skip_recovery: | |||
1419 | continue; | 1433 | continue; |
1420 | } | 1434 | } |
1421 | ocfs2_queue_recovery_completion(osb->journal, rm_quota[i], | 1435 | ocfs2_queue_recovery_completion(osb->journal, rm_quota[i], |
1422 | NULL, NULL, qrec); | 1436 | NULL, NULL, qrec, |
1437 | ORPHAN_NEED_TRUNCATE); | ||
1423 | } | 1438 | } |
1424 | 1439 | ||
1425 | ocfs2_super_unlock(osb, 1); | 1440 | ocfs2_super_unlock(osb, 1); |
1426 | 1441 | ||
1427 | /* queue recovery for offline slots */ | 1442 | /* queue recovery for offline slots */ |
1428 | ocfs2_queue_replay_slots(osb); | 1443 | ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE); |
1429 | 1444 | ||
1430 | bail: | 1445 | bail: |
1431 | mutex_lock(&osb->recovery_lock); | 1446 | mutex_lock(&osb->recovery_lock); |
@@ -1447,7 +1462,6 @@ bail: | |||
1447 | * requires that we call do_exit(). And it isn't exported, but | 1462 | * requires that we call do_exit(). And it isn't exported, but |
1448 | * complete_and_exit() seems to be a minimal wrapper around it. */ | 1463 | * complete_and_exit() seems to be a minimal wrapper around it. */ |
1449 | complete_and_exit(NULL, status); | 1464 | complete_and_exit(NULL, status); |
1450 | return status; | ||
1451 | } | 1465 | } |
1452 | 1466 | ||
1453 | void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) | 1467 | void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) |
@@ -1712,7 +1726,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, | |||
1712 | 1726 | ||
1713 | /* This will kfree the memory pointed to by la_copy and tl_copy */ | 1727 | /* This will kfree the memory pointed to by la_copy and tl_copy */ |
1714 | ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy, | 1728 | ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy, |
1715 | tl_copy, NULL); | 1729 | tl_copy, NULL, ORPHAN_NEED_TRUNCATE); |
1716 | 1730 | ||
1717 | status = 0; | 1731 | status = 0; |
1718 | done: | 1732 | done: |
@@ -1902,7 +1916,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) | |||
1902 | 1916 | ||
1903 | for (i = 0; i < osb->max_slots; i++) | 1917 | for (i = 0; i < osb->max_slots; i++) |
1904 | ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL, | 1918 | ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL, |
1905 | NULL); | 1919 | NULL, ORPHAN_NO_NEED_TRUNCATE); |
1906 | /* | 1920 | /* |
1907 | * We queued a recovery on orphan slots, increment the sequence | 1921 | * We queued a recovery on orphan slots, increment the sequence |
1908 | * number and update LVB so other node will skip the scan for a while | 1922 | * number and update LVB so other node will skip the scan for a while |
@@ -2001,6 +2015,13 @@ static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name, | |||
2001 | if (IS_ERR(iter)) | 2015 | if (IS_ERR(iter)) |
2002 | return 0; | 2016 | return 0; |
2003 | 2017 | ||
2018 | /* Skip inodes which are already added to recover list, since dio may | ||
2019 | * happen concurrently with unlink/rename */ | ||
2020 | if (OCFS2_I(iter)->ip_next_orphan) { | ||
2021 | iput(iter); | ||
2022 | return 0; | ||
2023 | } | ||
2024 | |||
2004 | trace_ocfs2_orphan_filldir((unsigned long long)OCFS2_I(iter)->ip_blkno); | 2025 | trace_ocfs2_orphan_filldir((unsigned long long)OCFS2_I(iter)->ip_blkno); |
2005 | /* No locking is required for the next_orphan queue as there | 2026 | /* No locking is required for the next_orphan queue as there |
2006 | * is only ever a single process doing orphan recovery. */ | 2027 | * is only ever a single process doing orphan recovery. */ |
@@ -2109,7 +2130,8 @@ static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb, | |||
2109 | * advertising our state to ocfs2_delete_inode(). | 2130 | * advertising our state to ocfs2_delete_inode(). |
2110 | */ | 2131 | */ |
2111 | static int ocfs2_recover_orphans(struct ocfs2_super *osb, | 2132 | static int ocfs2_recover_orphans(struct ocfs2_super *osb, |
2112 | int slot) | 2133 | int slot, |
2134 | enum ocfs2_orphan_reco_type orphan_reco_type) | ||
2113 | { | 2135 | { |
2114 | int ret = 0; | 2136 | int ret = 0; |
2115 | struct inode *inode = NULL; | 2137 | struct inode *inode = NULL; |
@@ -2133,13 +2155,60 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
2133 | (unsigned long long)oi->ip_blkno); | 2155 | (unsigned long long)oi->ip_blkno); |
2134 | 2156 | ||
2135 | iter = oi->ip_next_orphan; | 2157 | iter = oi->ip_next_orphan; |
2158 | oi->ip_next_orphan = NULL; | ||
2159 | |||
2160 | /* | ||
2161 | * We need to take and drop the inode lock to | ||
2162 | * force read inode from disk. | ||
2163 | */ | ||
2164 | ret = ocfs2_inode_lock(inode, NULL, 0); | ||
2165 | if (ret) { | ||
2166 | mlog_errno(ret); | ||
2167 | goto next; | ||
2168 | } | ||
2169 | ocfs2_inode_unlock(inode, 0); | ||
2170 | |||
2171 | if (inode->i_nlink == 0) { | ||
2172 | spin_lock(&oi->ip_lock); | ||
2173 | /* Set the proper information to get us going into | ||
2174 | * ocfs2_delete_inode. */ | ||
2175 | oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; | ||
2176 | spin_unlock(&oi->ip_lock); | ||
2177 | } else if (orphan_reco_type == ORPHAN_NEED_TRUNCATE) { | ||
2178 | struct buffer_head *di_bh = NULL; | ||
2179 | |||
2180 | ret = ocfs2_rw_lock(inode, 1); | ||
2181 | if (ret) { | ||
2182 | mlog_errno(ret); | ||
2183 | goto next; | ||
2184 | } | ||
2185 | |||
2186 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | ||
2187 | if (ret < 0) { | ||
2188 | ocfs2_rw_unlock(inode, 1); | ||
2189 | mlog_errno(ret); | ||
2190 | goto next; | ||
2191 | } | ||
2192 | |||
2193 | ret = ocfs2_truncate_file(inode, di_bh, | ||
2194 | i_size_read(inode)); | ||
2195 | ocfs2_inode_unlock(inode, 1); | ||
2196 | ocfs2_rw_unlock(inode, 1); | ||
2197 | brelse(di_bh); | ||
2198 | if (ret < 0) { | ||
2199 | if (ret != -ENOSPC) | ||
2200 | mlog_errno(ret); | ||
2201 | goto next; | ||
2202 | } | ||
2203 | |||
2204 | ret = ocfs2_del_inode_from_orphan(osb, inode, 0, 0); | ||
2205 | if (ret) | ||
2206 | mlog_errno(ret); | ||
2136 | 2207 | ||
2137 | spin_lock(&oi->ip_lock); | 2208 | wake_up(&OCFS2_I(inode)->append_dio_wq); |
2138 | /* Set the proper information to get us going into | 2209 | } /* else if ORPHAN_NO_NEED_TRUNCATE, do nothing */ |
2139 | * ocfs2_delete_inode. */ | ||
2140 | oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; | ||
2141 | spin_unlock(&oi->ip_lock); | ||
2142 | 2210 | ||
2211 | next: | ||
2143 | iput(inode); | 2212 | iput(inode); |
2144 | 2213 | ||
2145 | inode = iter; | 2214 | inode = iter; |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 7f8cde94abfe..f4cd3c3e9fb7 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -472,6 +472,11 @@ static inline int ocfs2_unlink_credits(struct super_block *sb) | |||
472 | * orphan dir index leaf */ | 472 | * orphan dir index leaf */ |
473 | #define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 4) | 473 | #define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 4) |
474 | 474 | ||
475 | /* dinode + orphan dir dinode + extent tree leaf block + orphan dir entry + | ||
476 | * orphan dir index root + orphan dir index leaf */ | ||
477 | #define OCFS2_INODE_ADD_TO_ORPHAN_CREDITS (2 * OCFS2_INODE_UPDATE_CREDITS + 4) | ||
478 | #define OCFS2_INODE_DEL_FROM_ORPHAN_CREDITS OCFS2_INODE_ADD_TO_ORPHAN_CREDITS | ||
479 | |||
475 | /* dinode update, old dir dinode update, new dir dinode update, old | 480 | /* dinode update, old dir dinode update, new dir dinode update, old |
476 | * dir dir entry, new dir dir entry, dir entry update for renaming | 481 | * dir dir entry, new dir dir entry, dir entry update for renaming |
477 | * directory + target unlink + 3 x dir index leaves */ | 482 | * directory + target unlink + 3 x dir index leaves */ |
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 10d66c75cecb..9581d190f6e1 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
@@ -173,7 +173,6 @@ out: | |||
173 | static const struct vm_operations_struct ocfs2_file_vm_ops = { | 173 | static const struct vm_operations_struct ocfs2_file_vm_ops = { |
174 | .fault = ocfs2_fault, | 174 | .fault = ocfs2_fault, |
175 | .page_mkwrite = ocfs2_page_mkwrite, | 175 | .page_mkwrite = ocfs2_page_mkwrite, |
176 | .remap_pages = generic_file_remap_pages, | ||
177 | }; | 176 | }; |
178 | 177 | ||
179 | int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) | 178 | int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 914c121ec890..b5c3a5ea3ee6 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -79,7 +79,8 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | |||
79 | struct inode **ret_orphan_dir, | 79 | struct inode **ret_orphan_dir, |
80 | u64 blkno, | 80 | u64 blkno, |
81 | char *name, | 81 | char *name, |
82 | struct ocfs2_dir_lookup_result *lookup); | 82 | struct ocfs2_dir_lookup_result *lookup, |
83 | bool dio); | ||
83 | 84 | ||
84 | static int ocfs2_orphan_add(struct ocfs2_super *osb, | 85 | static int ocfs2_orphan_add(struct ocfs2_super *osb, |
85 | handle_t *handle, | 86 | handle_t *handle, |
@@ -87,7 +88,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
87 | struct buffer_head *fe_bh, | 88 | struct buffer_head *fe_bh, |
88 | char *name, | 89 | char *name, |
89 | struct ocfs2_dir_lookup_result *lookup, | 90 | struct ocfs2_dir_lookup_result *lookup, |
90 | struct inode *orphan_dir_inode); | 91 | struct inode *orphan_dir_inode, |
92 | bool dio); | ||
91 | 93 | ||
92 | static int ocfs2_create_symlink_data(struct ocfs2_super *osb, | 94 | static int ocfs2_create_symlink_data(struct ocfs2_super *osb, |
93 | handle_t *handle, | 95 | handle_t *handle, |
@@ -104,6 +106,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, | |||
104 | static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2); | 106 | static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2); |
105 | /* An orphan dir name is an 8 byte value, printed as a hex string */ | 107 | /* An orphan dir name is an 8 byte value, printed as a hex string */ |
106 | #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64))) | 108 | #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64))) |
109 | #define OCFS2_DIO_ORPHAN_PREFIX "dio-" | ||
110 | #define OCFS2_DIO_ORPHAN_PREFIX_LEN 4 | ||
107 | 111 | ||
108 | static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, | 112 | static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, |
109 | unsigned int flags) | 113 | unsigned int flags) |
@@ -952,7 +956,8 @@ static int ocfs2_unlink(struct inode *dir, | |||
952 | if (ocfs2_inode_is_unlinkable(inode)) { | 956 | if (ocfs2_inode_is_unlinkable(inode)) { |
953 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, | 957 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, |
954 | OCFS2_I(inode)->ip_blkno, | 958 | OCFS2_I(inode)->ip_blkno, |
955 | orphan_name, &orphan_insert); | 959 | orphan_name, &orphan_insert, |
960 | false); | ||
956 | if (status < 0) { | 961 | if (status < 0) { |
957 | mlog_errno(status); | 962 | mlog_errno(status); |
958 | goto leave; | 963 | goto leave; |
@@ -1004,7 +1009,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
1004 | 1009 | ||
1005 | if (is_unlinkable) { | 1010 | if (is_unlinkable) { |
1006 | status = ocfs2_orphan_add(osb, handle, inode, fe_bh, | 1011 | status = ocfs2_orphan_add(osb, handle, inode, fe_bh, |
1007 | orphan_name, &orphan_insert, orphan_dir); | 1012 | orphan_name, &orphan_insert, orphan_dir, false); |
1008 | if (status < 0) | 1013 | if (status < 0) |
1009 | mlog_errno(status); | 1014 | mlog_errno(status); |
1010 | } | 1015 | } |
@@ -1440,7 +1445,8 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1440 | if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) { | 1445 | if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) { |
1441 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, | 1446 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, |
1442 | OCFS2_I(new_inode)->ip_blkno, | 1447 | OCFS2_I(new_inode)->ip_blkno, |
1443 | orphan_name, &orphan_insert); | 1448 | orphan_name, &orphan_insert, |
1449 | false); | ||
1444 | if (status < 0) { | 1450 | if (status < 0) { |
1445 | mlog_errno(status); | 1451 | mlog_errno(status); |
1446 | goto bail; | 1452 | goto bail; |
@@ -1507,7 +1513,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1507 | if (should_add_orphan) { | 1513 | if (should_add_orphan) { |
1508 | status = ocfs2_orphan_add(osb, handle, new_inode, | 1514 | status = ocfs2_orphan_add(osb, handle, new_inode, |
1509 | newfe_bh, orphan_name, | 1515 | newfe_bh, orphan_name, |
1510 | &orphan_insert, orphan_dir); | 1516 | &orphan_insert, orphan_dir, false); |
1511 | if (status < 0) { | 1517 | if (status < 0) { |
1512 | mlog_errno(status); | 1518 | mlog_errno(status); |
1513 | goto bail; | 1519 | goto bail; |
@@ -2088,12 +2094,28 @@ static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode, | |||
2088 | struct buffer_head *orphan_dir_bh, | 2094 | struct buffer_head *orphan_dir_bh, |
2089 | u64 blkno, | 2095 | u64 blkno, |
2090 | char *name, | 2096 | char *name, |
2091 | struct ocfs2_dir_lookup_result *lookup) | 2097 | struct ocfs2_dir_lookup_result *lookup, |
2098 | bool dio) | ||
2092 | { | 2099 | { |
2093 | int ret; | 2100 | int ret; |
2094 | struct ocfs2_super *osb = OCFS2_SB(orphan_dir_inode->i_sb); | 2101 | struct ocfs2_super *osb = OCFS2_SB(orphan_dir_inode->i_sb); |
2102 | int namelen = dio ? | ||
2103 | (OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN) : | ||
2104 | OCFS2_ORPHAN_NAMELEN; | ||
2105 | |||
2106 | if (dio) { | ||
2107 | ret = snprintf(name, OCFS2_DIO_ORPHAN_PREFIX_LEN + 1, "%s", | ||
2108 | OCFS2_DIO_ORPHAN_PREFIX); | ||
2109 | if (ret != OCFS2_DIO_ORPHAN_PREFIX_LEN) { | ||
2110 | ret = -EINVAL; | ||
2111 | mlog_errno(ret); | ||
2112 | return ret; | ||
2113 | } | ||
2095 | 2114 | ||
2096 | ret = ocfs2_blkno_stringify(blkno, name); | 2115 | ret = ocfs2_blkno_stringify(blkno, |
2116 | name + OCFS2_DIO_ORPHAN_PREFIX_LEN); | ||
2117 | } else | ||
2118 | ret = ocfs2_blkno_stringify(blkno, name); | ||
2097 | if (ret < 0) { | 2119 | if (ret < 0) { |
2098 | mlog_errno(ret); | 2120 | mlog_errno(ret); |
2099 | return ret; | 2121 | return ret; |
@@ -2101,7 +2123,7 @@ static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode, | |||
2101 | 2123 | ||
2102 | ret = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode, | 2124 | ret = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode, |
2103 | orphan_dir_bh, name, | 2125 | orphan_dir_bh, name, |
2104 | OCFS2_ORPHAN_NAMELEN, lookup); | 2126 | namelen, lookup); |
2105 | if (ret < 0) { | 2127 | if (ret < 0) { |
2106 | mlog_errno(ret); | 2128 | mlog_errno(ret); |
2107 | return ret; | 2129 | return ret; |
@@ -2128,7 +2150,8 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | |||
2128 | struct inode **ret_orphan_dir, | 2150 | struct inode **ret_orphan_dir, |
2129 | u64 blkno, | 2151 | u64 blkno, |
2130 | char *name, | 2152 | char *name, |
2131 | struct ocfs2_dir_lookup_result *lookup) | 2153 | struct ocfs2_dir_lookup_result *lookup, |
2154 | bool dio) | ||
2132 | { | 2155 | { |
2133 | struct inode *orphan_dir_inode = NULL; | 2156 | struct inode *orphan_dir_inode = NULL; |
2134 | struct buffer_head *orphan_dir_bh = NULL; | 2157 | struct buffer_head *orphan_dir_bh = NULL; |
@@ -2142,7 +2165,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | |||
2142 | } | 2165 | } |
2143 | 2166 | ||
2144 | ret = __ocfs2_prepare_orphan_dir(orphan_dir_inode, orphan_dir_bh, | 2167 | ret = __ocfs2_prepare_orphan_dir(orphan_dir_inode, orphan_dir_bh, |
2145 | blkno, name, lookup); | 2168 | blkno, name, lookup, dio); |
2146 | if (ret < 0) { | 2169 | if (ret < 0) { |
2147 | mlog_errno(ret); | 2170 | mlog_errno(ret); |
2148 | goto out; | 2171 | goto out; |
@@ -2170,12 +2193,16 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
2170 | struct buffer_head *fe_bh, | 2193 | struct buffer_head *fe_bh, |
2171 | char *name, | 2194 | char *name, |
2172 | struct ocfs2_dir_lookup_result *lookup, | 2195 | struct ocfs2_dir_lookup_result *lookup, |
2173 | struct inode *orphan_dir_inode) | 2196 | struct inode *orphan_dir_inode, |
2197 | bool dio) | ||
2174 | { | 2198 | { |
2175 | struct buffer_head *orphan_dir_bh = NULL; | 2199 | struct buffer_head *orphan_dir_bh = NULL; |
2176 | int status = 0; | 2200 | int status = 0; |
2177 | struct ocfs2_dinode *orphan_fe; | 2201 | struct ocfs2_dinode *orphan_fe; |
2178 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; | 2202 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; |
2203 | int namelen = dio ? | ||
2204 | (OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN) : | ||
2205 | OCFS2_ORPHAN_NAMELEN; | ||
2179 | 2206 | ||
2180 | trace_ocfs2_orphan_add_begin( | 2207 | trace_ocfs2_orphan_add_begin( |
2181 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 2208 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
@@ -2219,7 +2246,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
2219 | ocfs2_journal_dirty(handle, orphan_dir_bh); | 2246 | ocfs2_journal_dirty(handle, orphan_dir_bh); |
2220 | 2247 | ||
2221 | status = __ocfs2_add_entry(handle, orphan_dir_inode, name, | 2248 | status = __ocfs2_add_entry(handle, orphan_dir_inode, name, |
2222 | OCFS2_ORPHAN_NAMELEN, inode, | 2249 | namelen, inode, |
2223 | OCFS2_I(inode)->ip_blkno, | 2250 | OCFS2_I(inode)->ip_blkno, |
2224 | orphan_dir_bh, lookup); | 2251 | orphan_dir_bh, lookup); |
2225 | if (status < 0) { | 2252 | if (status < 0) { |
@@ -2227,13 +2254,21 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
2227 | goto rollback; | 2254 | goto rollback; |
2228 | } | 2255 | } |
2229 | 2256 | ||
2230 | fe->i_flags |= cpu_to_le32(OCFS2_ORPHANED_FL); | 2257 | if (dio) { |
2231 | OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR; | 2258 | /* Update flag OCFS2_DIO_ORPHANED_FL and record the orphan |
2259 | * slot. | ||
2260 | */ | ||
2261 | fe->i_flags |= cpu_to_le32(OCFS2_DIO_ORPHANED_FL); | ||
2262 | fe->i_dio_orphaned_slot = cpu_to_le16(osb->slot_num); | ||
2263 | } else { | ||
2264 | fe->i_flags |= cpu_to_le32(OCFS2_ORPHANED_FL); | ||
2265 | OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR; | ||
2232 | 2266 | ||
2233 | /* Record which orphan dir our inode now resides | 2267 | /* Record which orphan dir our inode now resides |
2234 | * in. delete_inode will use this to determine which orphan | 2268 | * in. delete_inode will use this to determine which orphan |
2235 | * dir to lock. */ | 2269 | * dir to lock. */ |
2236 | fe->i_orphaned_slot = cpu_to_le16(osb->slot_num); | 2270 | fe->i_orphaned_slot = cpu_to_le16(osb->slot_num); |
2271 | } | ||
2237 | 2272 | ||
2238 | ocfs2_journal_dirty(handle, fe_bh); | 2273 | ocfs2_journal_dirty(handle, fe_bh); |
2239 | 2274 | ||
@@ -2258,14 +2293,28 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, | |||
2258 | handle_t *handle, | 2293 | handle_t *handle, |
2259 | struct inode *orphan_dir_inode, | 2294 | struct inode *orphan_dir_inode, |
2260 | struct inode *inode, | 2295 | struct inode *inode, |
2261 | struct buffer_head *orphan_dir_bh) | 2296 | struct buffer_head *orphan_dir_bh, |
2297 | bool dio) | ||
2262 | { | 2298 | { |
2263 | char name[OCFS2_ORPHAN_NAMELEN + 1]; | 2299 | const int namelen = OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN; |
2300 | char name[namelen + 1]; | ||
2264 | struct ocfs2_dinode *orphan_fe; | 2301 | struct ocfs2_dinode *orphan_fe; |
2265 | int status = 0; | 2302 | int status = 0; |
2266 | struct ocfs2_dir_lookup_result lookup = { NULL, }; | 2303 | struct ocfs2_dir_lookup_result lookup = { NULL, }; |
2267 | 2304 | ||
2268 | status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name); | 2305 | if (dio) { |
2306 | status = snprintf(name, OCFS2_DIO_ORPHAN_PREFIX_LEN + 1, "%s", | ||
2307 | OCFS2_DIO_ORPHAN_PREFIX); | ||
2308 | if (status != OCFS2_DIO_ORPHAN_PREFIX_LEN) { | ||
2309 | status = -EINVAL; | ||
2310 | mlog_errno(status); | ||
2311 | return status; | ||
2312 | } | ||
2313 | |||
2314 | status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, | ||
2315 | name + OCFS2_DIO_ORPHAN_PREFIX_LEN); | ||
2316 | } else | ||
2317 | status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name); | ||
2269 | if (status < 0) { | 2318 | if (status < 0) { |
2270 | mlog_errno(status); | 2319 | mlog_errno(status); |
2271 | goto leave; | 2320 | goto leave; |
@@ -2273,10 +2322,10 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, | |||
2273 | 2322 | ||
2274 | trace_ocfs2_orphan_del( | 2323 | trace_ocfs2_orphan_del( |
2275 | (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, | 2324 | (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, |
2276 | name, OCFS2_ORPHAN_NAMELEN); | 2325 | name, namelen); |
2277 | 2326 | ||
2278 | /* find it's spot in the orphan directory */ | 2327 | /* find it's spot in the orphan directory */ |
2279 | status = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN, orphan_dir_inode, | 2328 | status = ocfs2_find_entry(name, namelen, orphan_dir_inode, |
2280 | &lookup); | 2329 | &lookup); |
2281 | if (status) { | 2330 | if (status) { |
2282 | mlog_errno(status); | 2331 | mlog_errno(status); |
@@ -2376,7 +2425,8 @@ static int ocfs2_prep_new_orphaned_file(struct inode *dir, | |||
2376 | } | 2425 | } |
2377 | 2426 | ||
2378 | ret = __ocfs2_prepare_orphan_dir(orphan_dir, orphan_dir_bh, | 2427 | ret = __ocfs2_prepare_orphan_dir(orphan_dir, orphan_dir_bh, |
2379 | di_blkno, orphan_name, orphan_insert); | 2428 | di_blkno, orphan_name, orphan_insert, |
2429 | false); | ||
2380 | if (ret < 0) { | 2430 | if (ret < 0) { |
2381 | mlog_errno(ret); | 2431 | mlog_errno(ret); |
2382 | goto out; | 2432 | goto out; |
@@ -2482,7 +2532,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, | |||
2482 | 2532 | ||
2483 | di = (struct ocfs2_dinode *)new_di_bh->b_data; | 2533 | di = (struct ocfs2_dinode *)new_di_bh->b_data; |
2484 | status = ocfs2_orphan_add(osb, handle, inode, new_di_bh, orphan_name, | 2534 | status = ocfs2_orphan_add(osb, handle, inode, new_di_bh, orphan_name, |
2485 | &orphan_insert, orphan_dir); | 2535 | &orphan_insert, orphan_dir, false); |
2486 | if (status < 0) { | 2536 | if (status < 0) { |
2487 | mlog_errno(status); | 2537 | mlog_errno(status); |
2488 | goto leave; | 2538 | goto leave; |
@@ -2527,6 +2577,186 @@ leave: | |||
2527 | return status; | 2577 | return status; |
2528 | } | 2578 | } |
2529 | 2579 | ||
2580 | static int ocfs2_dio_orphan_recovered(struct inode *inode) | ||
2581 | { | ||
2582 | int ret; | ||
2583 | struct buffer_head *di_bh = NULL; | ||
2584 | struct ocfs2_dinode *di = NULL; | ||
2585 | |||
2586 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | ||
2587 | if (ret < 0) { | ||
2588 | mlog_errno(ret); | ||
2589 | return 0; | ||
2590 | } | ||
2591 | |||
2592 | di = (struct ocfs2_dinode *) di_bh->b_data; | ||
2593 | ret = !(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL)); | ||
2594 | ocfs2_inode_unlock(inode, 1); | ||
2595 | brelse(di_bh); | ||
2596 | |||
2597 | return ret; | ||
2598 | } | ||
2599 | |||
2600 | #define OCFS2_DIO_ORPHANED_FL_CHECK_INTERVAL 10000 | ||
2601 | int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb, | ||
2602 | struct inode *inode) | ||
2603 | { | ||
2604 | char orphan_name[OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN + 1]; | ||
2605 | struct inode *orphan_dir_inode = NULL; | ||
2606 | struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; | ||
2607 | struct buffer_head *di_bh = NULL; | ||
2608 | int status = 0; | ||
2609 | handle_t *handle = NULL; | ||
2610 | struct ocfs2_dinode *di = NULL; | ||
2611 | |||
2612 | restart: | ||
2613 | status = ocfs2_inode_lock(inode, &di_bh, 1); | ||
2614 | if (status < 0) { | ||
2615 | mlog_errno(status); | ||
2616 | goto bail; | ||
2617 | } | ||
2618 | |||
2619 | di = (struct ocfs2_dinode *) di_bh->b_data; | ||
2620 | /* | ||
2621 | * Another append dio crashed? | ||
2622 | * If so, wait for recovery first. | ||
2623 | */ | ||
2624 | if (unlikely(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL))) { | ||
2625 | ocfs2_inode_unlock(inode, 1); | ||
2626 | brelse(di_bh); | ||
2627 | wait_event_interruptible_timeout(OCFS2_I(inode)->append_dio_wq, | ||
2628 | ocfs2_dio_orphan_recovered(inode), | ||
2629 | msecs_to_jiffies(OCFS2_DIO_ORPHANED_FL_CHECK_INTERVAL)); | ||
2630 | goto restart; | ||
2631 | } | ||
2632 | |||
2633 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir_inode, | ||
2634 | OCFS2_I(inode)->ip_blkno, | ||
2635 | orphan_name, | ||
2636 | &orphan_insert, | ||
2637 | true); | ||
2638 | if (status < 0) { | ||
2639 | mlog_errno(status); | ||
2640 | goto bail_unlock_inode; | ||
2641 | } | ||
2642 | |||
2643 | handle = ocfs2_start_trans(osb, | ||
2644 | OCFS2_INODE_ADD_TO_ORPHAN_CREDITS); | ||
2645 | if (IS_ERR(handle)) { | ||
2646 | status = PTR_ERR(handle); | ||
2647 | goto bail_unlock_orphan; | ||
2648 | } | ||
2649 | |||
2650 | status = ocfs2_orphan_add(osb, handle, inode, di_bh, orphan_name, | ||
2651 | &orphan_insert, orphan_dir_inode, true); | ||
2652 | if (status) | ||
2653 | mlog_errno(status); | ||
2654 | |||
2655 | ocfs2_commit_trans(osb, handle); | ||
2656 | |||
2657 | bail_unlock_orphan: | ||
2658 | ocfs2_inode_unlock(orphan_dir_inode, 1); | ||
2659 | mutex_unlock(&orphan_dir_inode->i_mutex); | ||
2660 | iput(orphan_dir_inode); | ||
2661 | |||
2662 | ocfs2_free_dir_lookup_result(&orphan_insert); | ||
2663 | |||
2664 | bail_unlock_inode: | ||
2665 | ocfs2_inode_unlock(inode, 1); | ||
2666 | brelse(di_bh); | ||
2667 | |||
2668 | bail: | ||
2669 | return status; | ||
2670 | } | ||
2671 | |||
2672 | int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb, | ||
2673 | struct inode *inode, int update_isize, | ||
2674 | loff_t end) | ||
2675 | { | ||
2676 | struct inode *orphan_dir_inode = NULL; | ||
2677 | struct buffer_head *orphan_dir_bh = NULL; | ||
2678 | struct buffer_head *di_bh = NULL; | ||
2679 | struct ocfs2_dinode *di = NULL; | ||
2680 | handle_t *handle = NULL; | ||
2681 | int status = 0; | ||
2682 | |||
2683 | status = ocfs2_inode_lock(inode, &di_bh, 1); | ||
2684 | if (status < 0) { | ||
2685 | mlog_errno(status); | ||
2686 | goto bail; | ||
2687 | } | ||
2688 | di = (struct ocfs2_dinode *) di_bh->b_data; | ||
2689 | |||
2690 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, | ||
2691 | ORPHAN_DIR_SYSTEM_INODE, | ||
2692 | le16_to_cpu(di->i_dio_orphaned_slot)); | ||
2693 | if (!orphan_dir_inode) { | ||
2694 | status = -ENOENT; | ||
2695 | mlog_errno(status); | ||
2696 | goto bail_unlock_inode; | ||
2697 | } | ||
2698 | |||
2699 | mutex_lock(&orphan_dir_inode->i_mutex); | ||
2700 | status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); | ||
2701 | if (status < 0) { | ||
2702 | mutex_unlock(&orphan_dir_inode->i_mutex); | ||
2703 | iput(orphan_dir_inode); | ||
2704 | mlog_errno(status); | ||
2705 | goto bail_unlock_inode; | ||
2706 | } | ||
2707 | |||
2708 | handle = ocfs2_start_trans(osb, | ||
2709 | OCFS2_INODE_DEL_FROM_ORPHAN_CREDITS); | ||
2710 | if (IS_ERR(handle)) { | ||
2711 | status = PTR_ERR(handle); | ||
2712 | goto bail_unlock_orphan; | ||
2713 | } | ||
2714 | |||
2715 | BUG_ON(!(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL))); | ||
2716 | |||
2717 | status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, | ||
2718 | inode, orphan_dir_bh, true); | ||
2719 | if (status < 0) { | ||
2720 | mlog_errno(status); | ||
2721 | goto bail_commit; | ||
2722 | } | ||
2723 | |||
2724 | status = ocfs2_journal_access_di(handle, | ||
2725 | INODE_CACHE(inode), | ||
2726 | di_bh, | ||
2727 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2728 | if (status < 0) { | ||
2729 | mlog_errno(status); | ||
2730 | goto bail_commit; | ||
2731 | } | ||
2732 | |||
2733 | di->i_flags &= ~cpu_to_le32(OCFS2_DIO_ORPHANED_FL); | ||
2734 | di->i_dio_orphaned_slot = 0; | ||
2735 | |||
2736 | if (update_isize) { | ||
2737 | status = ocfs2_set_inode_size(handle, inode, di_bh, end); | ||
2738 | if (status) | ||
2739 | mlog_errno(status); | ||
2740 | } else | ||
2741 | ocfs2_journal_dirty(handle, di_bh); | ||
2742 | |||
2743 | bail_commit: | ||
2744 | ocfs2_commit_trans(osb, handle); | ||
2745 | |||
2746 | bail_unlock_orphan: | ||
2747 | ocfs2_inode_unlock(orphan_dir_inode, 1); | ||
2748 | mutex_unlock(&orphan_dir_inode->i_mutex); | ||
2749 | brelse(orphan_dir_bh); | ||
2750 | iput(orphan_dir_inode); | ||
2751 | |||
2752 | bail_unlock_inode: | ||
2753 | ocfs2_inode_unlock(inode, 1); | ||
2754 | brelse(di_bh); | ||
2755 | |||
2756 | bail: | ||
2757 | return status; | ||
2758 | } | ||
2759 | |||
2530 | int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, | 2760 | int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, |
2531 | struct inode *inode, | 2761 | struct inode *inode, |
2532 | struct dentry *dentry) | 2762 | struct dentry *dentry) |
@@ -2615,7 +2845,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, | |||
2615 | } | 2845 | } |
2616 | 2846 | ||
2617 | status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, | 2847 | status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, |
2618 | orphan_dir_bh); | 2848 | orphan_dir_bh, false); |
2619 | if (status < 0) { | 2849 | if (status < 0) { |
2620 | mlog_errno(status); | 2850 | mlog_errno(status); |
2621 | goto out_commit; | 2851 | goto out_commit; |
diff --git a/fs/ocfs2/namei.h b/fs/ocfs2/namei.h index e5d059d4f115..5ddecce172fa 100644 --- a/fs/ocfs2/namei.h +++ b/fs/ocfs2/namei.h | |||
@@ -34,10 +34,16 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, | |||
34 | handle_t *handle, | 34 | handle_t *handle, |
35 | struct inode *orphan_dir_inode, | 35 | struct inode *orphan_dir_inode, |
36 | struct inode *inode, | 36 | struct inode *inode, |
37 | struct buffer_head *orphan_dir_bh); | 37 | struct buffer_head *orphan_dir_bh, |
38 | bool dio); | ||
38 | int ocfs2_create_inode_in_orphan(struct inode *dir, | 39 | int ocfs2_create_inode_in_orphan(struct inode *dir, |
39 | int mode, | 40 | int mode, |
40 | struct inode **new_inode); | 41 | struct inode **new_inode); |
42 | int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb, | ||
43 | struct inode *inode); | ||
44 | int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb, | ||
45 | struct inode *inode, int update_isize, | ||
46 | loff_t end); | ||
41 | int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, | 47 | int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, |
42 | struct inode *new_inode, | 48 | struct inode *new_inode, |
43 | struct dentry *new_dentry); | 49 | struct dentry *new_dentry); |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 7d6b7d090452..8490c64d34fe 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -209,6 +209,11 @@ struct ocfs2_lock_res { | |||
209 | #endif | 209 | #endif |
210 | }; | 210 | }; |
211 | 211 | ||
212 | enum ocfs2_orphan_reco_type { | ||
213 | ORPHAN_NO_NEED_TRUNCATE = 0, | ||
214 | ORPHAN_NEED_TRUNCATE, | ||
215 | }; | ||
216 | |||
212 | enum ocfs2_orphan_scan_state { | 217 | enum ocfs2_orphan_scan_state { |
213 | ORPHAN_SCAN_ACTIVE, | 218 | ORPHAN_SCAN_ACTIVE, |
214 | ORPHAN_SCAN_INACTIVE | 219 | ORPHAN_SCAN_INACTIVE |
@@ -279,6 +284,8 @@ enum ocfs2_mount_options | |||
279 | writes */ | 284 | writes */ |
280 | OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */ | 285 | OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */ |
281 | OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */ | 286 | OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */ |
287 | |||
288 | OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */ | ||
282 | }; | 289 | }; |
283 | 290 | ||
284 | #define OCFS2_OSB_SOFT_RO 0x0001 | 291 | #define OCFS2_OSB_SOFT_RO 0x0001 |
@@ -493,6 +500,14 @@ static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb) | |||
493 | return 0; | 500 | return 0; |
494 | } | 501 | } |
495 | 502 | ||
503 | static inline int ocfs2_supports_append_dio(struct ocfs2_super *osb) | ||
504 | { | ||
505 | if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_APPEND_DIO) | ||
506 | return 1; | ||
507 | return 0; | ||
508 | } | ||
509 | |||
510 | |||
496 | static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb) | 511 | static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb) |
497 | { | 512 | { |
498 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INLINE_DATA) | 513 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INLINE_DATA) |
@@ -724,6 +739,16 @@ static inline unsigned int ocfs2_clusters_for_bytes(struct super_block *sb, | |||
724 | return clusters; | 739 | return clusters; |
725 | } | 740 | } |
726 | 741 | ||
742 | static inline unsigned int ocfs2_bytes_to_clusters(struct super_block *sb, | ||
743 | u64 bytes) | ||
744 | { | ||
745 | int cl_bits = OCFS2_SB(sb)->s_clustersize_bits; | ||
746 | unsigned int clusters; | ||
747 | |||
748 | clusters = (unsigned int)(bytes >> cl_bits); | ||
749 | return clusters; | ||
750 | } | ||
751 | |||
727 | static inline u64 ocfs2_blocks_for_bytes(struct super_block *sb, | 752 | static inline u64 ocfs2_blocks_for_bytes(struct super_block *sb, |
728 | u64 bytes) | 753 | u64 bytes) |
729 | { | 754 | { |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 938387a10d5d..20e37a3ed26f 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
@@ -105,7 +105,8 @@ | |||
105 | | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO) | 105 | | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO) |
106 | #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ | 106 | #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ |
107 | | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ | 107 | | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ |
108 | | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) | 108 | | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA \ |
109 | | OCFS2_FEATURE_RO_COMPAT_APPEND_DIO) | ||
109 | 110 | ||
110 | /* | 111 | /* |
111 | * Heartbeat-only devices are missing journals and other files. The | 112 | * Heartbeat-only devices are missing journals and other files. The |
@@ -199,6 +200,11 @@ | |||
199 | #define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002 | 200 | #define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002 |
200 | #define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004 | 201 | #define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004 |
201 | 202 | ||
203 | /* | ||
204 | * Append Direct IO support | ||
205 | */ | ||
206 | #define OCFS2_FEATURE_RO_COMPAT_APPEND_DIO 0x0008 | ||
207 | |||
202 | /* The byte offset of the first backup block will be 1G. | 208 | /* The byte offset of the first backup block will be 1G. |
203 | * The following will be 4G, 16G, 64G, 256G and 1T. | 209 | * The following will be 4G, 16G, 64G, 256G and 1T. |
204 | */ | 210 | */ |
@@ -229,6 +235,8 @@ | |||
229 | #define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */ | 235 | #define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */ |
230 | #define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */ | 236 | #define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */ |
231 | #define OCFS2_QUOTA_FL (0x00001000) /* Quota file */ | 237 | #define OCFS2_QUOTA_FL (0x00001000) /* Quota file */ |
238 | #define OCFS2_DIO_ORPHANED_FL (0X00002000) /* On the orphan list especially | ||
239 | * for dio */ | ||
232 | 240 | ||
233 | /* | 241 | /* |
234 | * Flags on ocfs2_dinode.i_dyn_features | 242 | * Flags on ocfs2_dinode.i_dyn_features |
@@ -729,7 +737,9 @@ struct ocfs2_dinode { | |||
729 | inode belongs to. Only valid | 737 | inode belongs to. Only valid |
730 | if allocated from a | 738 | if allocated from a |
731 | discontiguous block group */ | 739 | discontiguous block group */ |
732 | /*A0*/ __le64 i_reserved2[3]; | 740 | /*A0*/ __le16 i_dio_orphaned_slot; /* only used for append dio write */ |
741 | __le16 i_reserved1[3]; | ||
742 | __le64 i_reserved2[2]; | ||
733 | /*B8*/ union { | 743 | /*B8*/ union { |
734 | __le64 i_pad1; /* Generic way to refer to this | 744 | __le64 i_pad1; /* Generic way to refer to this |
735 | 64bit union */ | 745 | 64bit union */ |
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h index 1eae330193a6..b6d51333ad02 100644 --- a/fs/ocfs2/quota.h +++ b/fs/ocfs2/quota.h | |||
@@ -48,6 +48,7 @@ struct ocfs2_quota_recovery { | |||
48 | /* In-memory structure with quota header information */ | 48 | /* In-memory structure with quota header information */ |
49 | struct ocfs2_mem_dqinfo { | 49 | struct ocfs2_mem_dqinfo { |
50 | unsigned int dqi_type; /* Quota type this structure describes */ | 50 | unsigned int dqi_type; /* Quota type this structure describes */ |
51 | unsigned int dqi_flags; /* Flags OLQF_* */ | ||
51 | unsigned int dqi_chunks; /* Number of chunks in local quota file */ | 52 | unsigned int dqi_chunks; /* Number of chunks in local quota file */ |
52 | unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */ | 53 | unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */ |
53 | unsigned int dqi_syncms; /* How often should we sync with other nodes */ | 54 | unsigned int dqi_syncms; /* How often should we sync with other nodes */ |
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 10b653930ee2..3d0b63d34225 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c | |||
@@ -73,12 +73,6 @@ static loff_t ol_dqblk_off(struct super_block *sb, int c, int off) | |||
73 | ol_dqblk_block_off(sb, c, off); | 73 | ol_dqblk_block_off(sb, c, off); |
74 | } | 74 | } |
75 | 75 | ||
76 | /* Compute block number from given offset */ | ||
77 | static inline unsigned int ol_dqblk_file_block(struct super_block *sb, loff_t off) | ||
78 | { | ||
79 | return off >> sb->s_blocksize_bits; | ||
80 | } | ||
81 | |||
82 | static inline unsigned int ol_dqblk_block_offset(struct super_block *sb, loff_t off) | 76 | static inline unsigned int ol_dqblk_block_offset(struct super_block *sb, loff_t off) |
83 | { | 77 | { |
84 | return off & ((1 << sb->s_blocksize_bits) - 1); | 78 | return off & ((1 << sb->s_blocksize_bits) - 1); |
@@ -292,7 +286,7 @@ static void olq_update_info(struct buffer_head *bh, void *private) | |||
292 | ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data + | 286 | ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data + |
293 | OCFS2_LOCAL_INFO_OFF); | 287 | OCFS2_LOCAL_INFO_OFF); |
294 | spin_lock(&dq_data_lock); | 288 | spin_lock(&dq_data_lock); |
295 | ldinfo->dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK); | 289 | ldinfo->dqi_flags = cpu_to_le32(oinfo->dqi_flags); |
296 | ldinfo->dqi_chunks = cpu_to_le32(oinfo->dqi_chunks); | 290 | ldinfo->dqi_chunks = cpu_to_le32(oinfo->dqi_chunks); |
297 | ldinfo->dqi_blocks = cpu_to_le32(oinfo->dqi_blocks); | 291 | ldinfo->dqi_blocks = cpu_to_le32(oinfo->dqi_blocks); |
298 | spin_unlock(&dq_data_lock); | 292 | spin_unlock(&dq_data_lock); |
@@ -701,8 +695,8 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) | |||
701 | /* We don't need the lock and we have to acquire quota file locks | 695 | /* We don't need the lock and we have to acquire quota file locks |
702 | * which will later depend on this lock */ | 696 | * which will later depend on this lock */ |
703 | mutex_unlock(&sb_dqopt(sb)->dqio_mutex); | 697 | mutex_unlock(&sb_dqopt(sb)->dqio_mutex); |
704 | info->dqi_maxblimit = 0x7fffffffffffffffLL; | 698 | info->dqi_max_spc_limit = 0x7fffffffffffffffLL; |
705 | info->dqi_maxilimit = 0x7fffffffffffffffLL; | 699 | info->dqi_max_ino_limit = 0x7fffffffffffffffLL; |
706 | oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS); | 700 | oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS); |
707 | if (!oinfo) { | 701 | if (!oinfo) { |
708 | mlog(ML_ERROR, "failed to allocate memory for ocfs2 quota" | 702 | mlog(ML_ERROR, "failed to allocate memory for ocfs2 quota" |
@@ -737,13 +731,13 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) | |||
737 | } | 731 | } |
738 | ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data + | 732 | ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data + |
739 | OCFS2_LOCAL_INFO_OFF); | 733 | OCFS2_LOCAL_INFO_OFF); |
740 | info->dqi_flags = le32_to_cpu(ldinfo->dqi_flags); | 734 | oinfo->dqi_flags = le32_to_cpu(ldinfo->dqi_flags); |
741 | oinfo->dqi_chunks = le32_to_cpu(ldinfo->dqi_chunks); | 735 | oinfo->dqi_chunks = le32_to_cpu(ldinfo->dqi_chunks); |
742 | oinfo->dqi_blocks = le32_to_cpu(ldinfo->dqi_blocks); | 736 | oinfo->dqi_blocks = le32_to_cpu(ldinfo->dqi_blocks); |
743 | oinfo->dqi_libh = bh; | 737 | oinfo->dqi_libh = bh; |
744 | 738 | ||
745 | /* We crashed when using local quota file? */ | 739 | /* We crashed when using local quota file? */ |
746 | if (!(info->dqi_flags & OLQF_CLEAN)) { | 740 | if (!(oinfo->dqi_flags & OLQF_CLEAN)) { |
747 | rec = OCFS2_SB(sb)->quota_rec; | 741 | rec = OCFS2_SB(sb)->quota_rec; |
748 | if (!rec) { | 742 | if (!rec) { |
749 | rec = ocfs2_alloc_quota_recovery(); | 743 | rec = ocfs2_alloc_quota_recovery(); |
@@ -772,7 +766,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) | |||
772 | } | 766 | } |
773 | 767 | ||
774 | /* Now mark quota file as used */ | 768 | /* Now mark quota file as used */ |
775 | info->dqi_flags &= ~OLQF_CLEAN; | 769 | oinfo->dqi_flags &= ~OLQF_CLEAN; |
776 | status = ocfs2_modify_bh(lqinode, bh, olq_update_info, info); | 770 | status = ocfs2_modify_bh(lqinode, bh, olq_update_info, info); |
777 | if (status < 0) { | 771 | if (status < 0) { |
778 | mlog_errno(status); | 772 | mlog_errno(status); |
@@ -857,7 +851,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) | |||
857 | goto out; | 851 | goto out; |
858 | 852 | ||
859 | /* Mark local file as clean */ | 853 | /* Mark local file as clean */ |
860 | info->dqi_flags |= OLQF_CLEAN; | 854 | oinfo->dqi_flags |= OLQF_CLEAN; |
861 | status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], | 855 | status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], |
862 | oinfo->dqi_libh, | 856 | oinfo->dqi_libh, |
863 | olq_update_info, | 857 | olq_update_info, |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index d81f6e2a97f5..ee541f92dab4 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -2428,8 +2428,6 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb, | |||
2428 | get_bh(prev_bh); | 2428 | get_bh(prev_bh); |
2429 | } | 2429 | } |
2430 | 2430 | ||
2431 | rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; | ||
2432 | |||
2433 | trace_ocfs2_calc_refcount_meta_credits_iterate( | 2431 | trace_ocfs2_calc_refcount_meta_credits_iterate( |
2434 | recs_add, (unsigned long long)cpos, clusters, | 2432 | recs_add, (unsigned long long)cpos, clusters, |
2435 | (unsigned long long)le64_to_cpu(rec.r_cpos), | 2433 | (unsigned long long)le64_to_cpu(rec.r_cpos), |
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c index 41ffd36c689c..6a348b0294ab 100644 --- a/fs/ocfs2/reservations.c +++ b/fs/ocfs2/reservations.c | |||
@@ -39,7 +39,7 @@ | |||
39 | #define OCFS2_CHECK_RESERVATIONS | 39 | #define OCFS2_CHECK_RESERVATIONS |
40 | #endif | 40 | #endif |
41 | 41 | ||
42 | DEFINE_SPINLOCK(resv_lock); | 42 | static DEFINE_SPINLOCK(resv_lock); |
43 | 43 | ||
44 | #define OCFS2_MIN_RESV_WINDOW_BITS 8 | 44 | #define OCFS2_MIN_RESV_WINDOW_BITS 8 |
45 | #define OCFS2_MAX_RESV_WINDOW_BITS 1024 | 45 | #define OCFS2_MAX_RESV_WINDOW_BITS 1024 |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 83723179e1ec..26675185b886 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -191,6 +191,7 @@ enum { | |||
191 | Opt_coherency_full, | 191 | Opt_coherency_full, |
192 | Opt_resv_level, | 192 | Opt_resv_level, |
193 | Opt_dir_resv_level, | 193 | Opt_dir_resv_level, |
194 | Opt_journal_async_commit, | ||
194 | Opt_err, | 195 | Opt_err, |
195 | }; | 196 | }; |
196 | 197 | ||
@@ -222,6 +223,7 @@ static const match_table_t tokens = { | |||
222 | {Opt_coherency_full, "coherency=full"}, | 223 | {Opt_coherency_full, "coherency=full"}, |
223 | {Opt_resv_level, "resv_level=%u"}, | 224 | {Opt_resv_level, "resv_level=%u"}, |
224 | {Opt_dir_resv_level, "dir_resv_level=%u"}, | 225 | {Opt_dir_resv_level, "dir_resv_level=%u"}, |
226 | {Opt_journal_async_commit, "journal_async_commit"}, | ||
225 | {Opt_err, NULL} | 227 | {Opt_err, NULL} |
226 | }; | 228 | }; |
227 | 229 | ||
@@ -1000,36 +1002,6 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb) | |||
1000 | } | 1002 | } |
1001 | } | 1003 | } |
1002 | 1004 | ||
1003 | /* Handle quota on quotactl */ | ||
1004 | static int ocfs2_quota_on(struct super_block *sb, int type, int format_id) | ||
1005 | { | ||
1006 | unsigned int feature[OCFS2_MAXQUOTAS] = { | ||
1007 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA, | ||
1008 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; | ||
1009 | |||
1010 | if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) | ||
1011 | return -EINVAL; | ||
1012 | |||
1013 | return dquot_enable(sb_dqopt(sb)->files[type], type, | ||
1014 | format_id, DQUOT_LIMITS_ENABLED); | ||
1015 | } | ||
1016 | |||
1017 | /* Handle quota off quotactl */ | ||
1018 | static int ocfs2_quota_off(struct super_block *sb, int type) | ||
1019 | { | ||
1020 | return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); | ||
1021 | } | ||
1022 | |||
1023 | static const struct quotactl_ops ocfs2_quotactl_ops = { | ||
1024 | .quota_on_meta = ocfs2_quota_on, | ||
1025 | .quota_off = ocfs2_quota_off, | ||
1026 | .quota_sync = dquot_quota_sync, | ||
1027 | .get_info = dquot_get_dqinfo, | ||
1028 | .set_info = dquot_set_dqinfo, | ||
1029 | .get_dqblk = dquot_get_dqblk, | ||
1030 | .set_dqblk = dquot_set_dqblk, | ||
1031 | }; | ||
1032 | |||
1033 | static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | 1005 | static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) |
1034 | { | 1006 | { |
1035 | struct dentry *root; | 1007 | struct dentry *root; |
@@ -1500,6 +1472,9 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1500 | option < OCFS2_MAX_RESV_LEVEL) | 1472 | option < OCFS2_MAX_RESV_LEVEL) |
1501 | mopt->dir_resv_level = option; | 1473 | mopt->dir_resv_level = option; |
1502 | break; | 1474 | break; |
1475 | case Opt_journal_async_commit: | ||
1476 | mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT; | ||
1477 | break; | ||
1503 | default: | 1478 | default: |
1504 | mlog(ML_ERROR, | 1479 | mlog(ML_ERROR, |
1505 | "Unrecognized mount option \"%s\" " | 1480 | "Unrecognized mount option \"%s\" " |
@@ -1606,6 +1581,9 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root) | |||
1606 | if (osb->osb_dir_resv_level != osb->osb_resv_level) | 1581 | if (osb->osb_dir_resv_level != osb->osb_resv_level) |
1607 | seq_printf(s, ",dir_resv_level=%d", osb->osb_resv_level); | 1582 | seq_printf(s, ",dir_resv_level=%d", osb->osb_resv_level); |
1608 | 1583 | ||
1584 | if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT) | ||
1585 | seq_printf(s, ",journal_async_commit"); | ||
1586 | |||
1609 | return 0; | 1587 | return 0; |
1610 | } | 1588 | } |
1611 | 1589 | ||
@@ -1768,6 +1746,8 @@ static void ocfs2_inode_init_once(void *data) | |||
1768 | ocfs2_lock_res_init_once(&oi->ip_inode_lockres); | 1746 | ocfs2_lock_res_init_once(&oi->ip_inode_lockres); |
1769 | ocfs2_lock_res_init_once(&oi->ip_open_lockres); | 1747 | ocfs2_lock_res_init_once(&oi->ip_open_lockres); |
1770 | 1748 | ||
1749 | init_waitqueue_head(&oi->append_dio_wq); | ||
1750 | |||
1771 | ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode), | 1751 | ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode), |
1772 | &ocfs2_inode_caching_ops); | 1752 | &ocfs2_inode_caching_ops); |
1773 | 1753 | ||
@@ -2079,7 +2059,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2079 | sb->s_op = &ocfs2_sops; | 2059 | sb->s_op = &ocfs2_sops; |
2080 | sb->s_d_op = &ocfs2_dentry_ops; | 2060 | sb->s_d_op = &ocfs2_dentry_ops; |
2081 | sb->s_export_op = &ocfs2_export_ops; | 2061 | sb->s_export_op = &ocfs2_export_ops; |
2082 | sb->s_qcop = &ocfs2_quotactl_ops; | 2062 | sb->s_qcop = &dquot_quotactl_sysfile_ops; |
2083 | sb->dq_op = &ocfs2_quota_operations; | 2063 | sb->dq_op = &ocfs2_quota_operations; |
2084 | sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; | 2064 | sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; |
2085 | sb->s_xattr = ocfs2_xattr_handlers; | 2065 | sb->s_xattr = ocfs2_xattr_handlers; |
@@ -2475,6 +2455,15 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) | |||
2475 | goto finally; | 2455 | goto finally; |
2476 | } | 2456 | } |
2477 | 2457 | ||
2458 | if (osb->s_mount_opt & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT) | ||
2459 | jbd2_journal_set_features(osb->journal->j_journal, | ||
2460 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | ||
2461 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | ||
2462 | else | ||
2463 | jbd2_journal_clear_features(osb->journal->j_journal, | ||
2464 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | ||
2465 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | ||
2466 | |||
2478 | if (dirty) { | 2467 | if (dirty) { |
2479 | /* recover my local alloc if we didn't unmount cleanly. */ | 2468 | /* recover my local alloc if we didn't unmount cleanly. */ |
2480 | status = ocfs2_begin_local_alloc_recovery(osb, | 2469 | status = ocfs2_begin_local_alloc_recovery(osb, |
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 662f8dee149f..85b190dc132f 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -5334,16 +5334,6 @@ out: | |||
5334 | return ret; | 5334 | return ret; |
5335 | } | 5335 | } |
5336 | 5336 | ||
5337 | static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode, | ||
5338 | struct ocfs2_xattr_bucket *bucket, | ||
5339 | int offs) | ||
5340 | { | ||
5341 | int block_off = offs >> inode->i_sb->s_blocksize_bits; | ||
5342 | |||
5343 | offs = offs % inode->i_sb->s_blocksize; | ||
5344 | return bucket_block(bucket, block_off) + offs; | ||
5345 | } | ||
5346 | |||
5347 | /* | 5337 | /* |
5348 | * Truncate the specified xe_off entry in xattr bucket. | 5338 | * Truncate the specified xe_off entry in xattr bucket. |
5349 | * bucket is indicated by header_bh and len is the new length. | 5339 | * bucket is indicated by header_bh and len is the new length. |
@@ -667,11 +667,8 @@ int open_check_o_direct(struct file *f) | |||
667 | { | 667 | { |
668 | /* NB: we're sure to have correct a_ops only after f_op->open */ | 668 | /* NB: we're sure to have correct a_ops only after f_op->open */ |
669 | if (f->f_flags & O_DIRECT) { | 669 | if (f->f_flags & O_DIRECT) { |
670 | if (!f->f_mapping->a_ops || | 670 | if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) |
671 | ((!f->f_mapping->a_ops->direct_IO) && | ||
672 | (!f->f_mapping->a_ops->get_xip_mem))) { | ||
673 | return -EINVAL; | 671 | return -EINVAL; |
674 | } | ||
675 | } | 672 | } |
676 | return 0; | 673 | return 0; |
677 | } | 674 | } |
@@ -971,8 +968,14 @@ struct file *file_open_name(struct filename *name, int flags, umode_t mode) | |||
971 | */ | 968 | */ |
972 | struct file *filp_open(const char *filename, int flags, umode_t mode) | 969 | struct file *filp_open(const char *filename, int flags, umode_t mode) |
973 | { | 970 | { |
974 | struct filename name = {.name = filename}; | 971 | struct filename *name = getname_kernel(filename); |
975 | return file_open_name(&name, flags, mode); | 972 | struct file *file = ERR_CAST(name); |
973 | |||
974 | if (!IS_ERR(name)) { | ||
975 | file = file_open_name(name, flags, mode); | ||
976 | putname(name); | ||
977 | } | ||
978 | return file; | ||
976 | } | 979 | } |
977 | EXPORT_SYMBOL(filp_open); | 980 | EXPORT_SYMBOL(filp_open); |
978 | 981 | ||
diff --git a/fs/proc/array.c b/fs/proc/array.c index bd117d065b82..1295a00ca316 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -81,6 +81,7 @@ | |||
81 | #include <linux/pid_namespace.h> | 81 | #include <linux/pid_namespace.h> |
82 | #include <linux/ptrace.h> | 82 | #include <linux/ptrace.h> |
83 | #include <linux/tracehook.h> | 83 | #include <linux/tracehook.h> |
84 | #include <linux/string_helpers.h> | ||
84 | #include <linux/user_namespace.h> | 85 | #include <linux/user_namespace.h> |
85 | 86 | ||
86 | #include <asm/pgtable.h> | 87 | #include <asm/pgtable.h> |
@@ -89,39 +90,18 @@ | |||
89 | 90 | ||
90 | static inline void task_name(struct seq_file *m, struct task_struct *p) | 91 | static inline void task_name(struct seq_file *m, struct task_struct *p) |
91 | { | 92 | { |
92 | int i; | 93 | char *buf; |
93 | char *buf, *end; | ||
94 | char *name; | ||
95 | char tcomm[sizeof(p->comm)]; | 94 | char tcomm[sizeof(p->comm)]; |
96 | 95 | ||
97 | get_task_comm(tcomm, p); | 96 | get_task_comm(tcomm, p); |
98 | 97 | ||
99 | seq_puts(m, "Name:\t"); | 98 | seq_puts(m, "Name:\t"); |
100 | end = m->buf + m->size; | ||
101 | buf = m->buf + m->count; | 99 | buf = m->buf + m->count; |
102 | name = tcomm; | 100 | |
103 | i = sizeof(tcomm); | 101 | /* Ignore error for now */ |
104 | while (i && (buf < end)) { | 102 | string_escape_str(tcomm, &buf, m->size - m->count, |
105 | unsigned char c = *name; | 103 | ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\"); |
106 | name++; | 104 | |
107 | i--; | ||
108 | *buf = c; | ||
109 | if (!c) | ||
110 | break; | ||
111 | if (c == '\\') { | ||
112 | buf++; | ||
113 | if (buf < end) | ||
114 | *buf++ = c; | ||
115 | continue; | ||
116 | } | ||
117 | if (c == '\n') { | ||
118 | *buf++ = '\\'; | ||
119 | if (buf < end) | ||
120 | *buf++ = 'n'; | ||
121 | continue; | ||
122 | } | ||
123 | buf++; | ||
124 | } | ||
125 | m->count = buf - m->buf; | 105 | m->count = buf - m->buf; |
126 | seq_putc(m, '\n'); | 106 | seq_putc(m, '\n'); |
127 | } | 107 | } |
@@ -336,12 +316,10 @@ static inline void task_context_switch_counts(struct seq_file *m, | |||
336 | 316 | ||
337 | static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) | 317 | static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) |
338 | { | 318 | { |
339 | seq_puts(m, "Cpus_allowed:\t"); | 319 | seq_printf(m, "Cpus_allowed:\t%*pb\n", |
340 | seq_cpumask(m, &task->cpus_allowed); | 320 | cpumask_pr_args(&task->cpus_allowed)); |
341 | seq_putc(m, '\n'); | 321 | seq_printf(m, "Cpus_allowed_list:\t%*pbl\n", |
342 | seq_puts(m, "Cpus_allowed_list:\t"); | 322 | cpumask_pr_args(&task->cpus_allowed)); |
343 | seq_cpumask_list(m, &task->cpus_allowed); | ||
344 | seq_putc(m, '\n'); | ||
345 | } | 323 | } |
346 | 324 | ||
347 | int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, | 325 | int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 7fea13229f33..3309f59d421b 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -122,7 +122,7 @@ static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
122 | struct kstat *stat) | 122 | struct kstat *stat) |
123 | { | 123 | { |
124 | struct inode *inode = dentry->d_inode; | 124 | struct inode *inode = dentry->d_inode; |
125 | struct proc_dir_entry *de = PROC_I(inode)->pde; | 125 | struct proc_dir_entry *de = PDE(inode); |
126 | if (de && de->nlink) | 126 | if (de && de->nlink) |
127 | set_nlink(inode, de->nlink); | 127 | set_nlink(inode, de->nlink); |
128 | 128 | ||
@@ -350,29 +350,12 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp | |||
350 | if (ret) | 350 | if (ret) |
351 | return ret; | 351 | return ret; |
352 | 352 | ||
353 | if (S_ISDIR(dp->mode)) { | ||
354 | dp->proc_fops = &proc_dir_operations; | ||
355 | dp->proc_iops = &proc_dir_inode_operations; | ||
356 | dir->nlink++; | ||
357 | } else if (S_ISLNK(dp->mode)) { | ||
358 | dp->proc_iops = &proc_link_inode_operations; | ||
359 | } else if (S_ISREG(dp->mode)) { | ||
360 | BUG_ON(dp->proc_fops == NULL); | ||
361 | dp->proc_iops = &proc_file_inode_operations; | ||
362 | } else { | ||
363 | WARN_ON(1); | ||
364 | proc_free_inum(dp->low_ino); | ||
365 | return -EINVAL; | ||
366 | } | ||
367 | |||
368 | spin_lock(&proc_subdir_lock); | 353 | spin_lock(&proc_subdir_lock); |
369 | dp->parent = dir; | 354 | dp->parent = dir; |
370 | if (pde_subdir_insert(dir, dp) == false) { | 355 | if (pde_subdir_insert(dir, dp) == false) { |
371 | WARN(1, "proc_dir_entry '%s/%s' already registered\n", | 356 | WARN(1, "proc_dir_entry '%s/%s' already registered\n", |
372 | dir->name, dp->name); | 357 | dir->name, dp->name); |
373 | spin_unlock(&proc_subdir_lock); | 358 | spin_unlock(&proc_subdir_lock); |
374 | if (S_ISDIR(dp->mode)) | ||
375 | dir->nlink--; | ||
376 | proc_free_inum(dp->low_ino); | 359 | proc_free_inum(dp->low_ino); |
377 | return -EEXIST; | 360 | return -EEXIST; |
378 | } | 361 | } |
@@ -431,6 +414,7 @@ struct proc_dir_entry *proc_symlink(const char *name, | |||
431 | ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); | 414 | ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); |
432 | if (ent->data) { | 415 | if (ent->data) { |
433 | strcpy((char*)ent->data,dest); | 416 | strcpy((char*)ent->data,dest); |
417 | ent->proc_iops = &proc_link_inode_operations; | ||
434 | if (proc_register(parent, ent) < 0) { | 418 | if (proc_register(parent, ent) < 0) { |
435 | kfree(ent->data); | 419 | kfree(ent->data); |
436 | kfree(ent); | 420 | kfree(ent); |
@@ -456,8 +440,12 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, | |||
456 | ent = __proc_create(&parent, name, S_IFDIR | mode, 2); | 440 | ent = __proc_create(&parent, name, S_IFDIR | mode, 2); |
457 | if (ent) { | 441 | if (ent) { |
458 | ent->data = data; | 442 | ent->data = data; |
443 | ent->proc_fops = &proc_dir_operations; | ||
444 | ent->proc_iops = &proc_dir_inode_operations; | ||
445 | parent->nlink++; | ||
459 | if (proc_register(parent, ent) < 0) { | 446 | if (proc_register(parent, ent) < 0) { |
460 | kfree(ent); | 447 | kfree(ent); |
448 | parent->nlink--; | ||
461 | ent = NULL; | 449 | ent = NULL; |
462 | } | 450 | } |
463 | } | 451 | } |
@@ -493,6 +481,8 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, | |||
493 | return NULL; | 481 | return NULL; |
494 | } | 482 | } |
495 | 483 | ||
484 | BUG_ON(proc_fops == NULL); | ||
485 | |||
496 | if ((mode & S_IALLUGO) == 0) | 486 | if ((mode & S_IALLUGO) == 0) |
497 | mode |= S_IRUGO; | 487 | mode |= S_IRUGO; |
498 | pde = __proc_create(&parent, name, mode, 1); | 488 | pde = __proc_create(&parent, name, mode, 1); |
@@ -500,6 +490,7 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, | |||
500 | goto out; | 490 | goto out; |
501 | pde->proc_fops = proc_fops; | 491 | pde->proc_fops = proc_fops; |
502 | pde->data = data; | 492 | pde->data = data; |
493 | pde->proc_iops = &proc_file_inode_operations; | ||
503 | if (proc_register(parent, pde) < 0) | 494 | if (proc_register(parent, pde) < 0) |
504 | goto out_free; | 495 | goto out_free; |
505 | return pde; | 496 | return pde; |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 8420a2f80811..13a50a32652d 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -40,7 +40,7 @@ static void proc_evict_inode(struct inode *inode) | |||
40 | put_pid(PROC_I(inode)->pid); | 40 | put_pid(PROC_I(inode)->pid); |
41 | 41 | ||
42 | /* Let go of any associated proc directory entry */ | 42 | /* Let go of any associated proc directory entry */ |
43 | de = PROC_I(inode)->pde; | 43 | de = PDE(inode); |
44 | if (de) | 44 | if (de) |
45 | pde_put(de); | 45 | pde_put(de); |
46 | head = PROC_I(inode)->sysctl; | 46 | head = PROC_I(inode)->sysctl; |
diff --git a/fs/proc/page.c b/fs/proc/page.c index 1e3187da1fed..7eee2d8b97d9 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/ksm.h> | 5 | #include <linux/ksm.h> |
6 | #include <linux/mm.h> | 6 | #include <linux/mm.h> |
7 | #include <linux/mmzone.h> | 7 | #include <linux/mmzone.h> |
8 | #include <linux/huge_mm.h> | ||
8 | #include <linux/proc_fs.h> | 9 | #include <linux/proc_fs.h> |
9 | #include <linux/seq_file.h> | 10 | #include <linux/seq_file.h> |
10 | #include <linux/hugetlb.h> | 11 | #include <linux/hugetlb.h> |
@@ -121,9 +122,18 @@ u64 stable_page_flags(struct page *page) | |||
121 | * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon | 122 | * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon |
122 | * to make sure a given page is a thp, not a non-huge compound page. | 123 | * to make sure a given page is a thp, not a non-huge compound page. |
123 | */ | 124 | */ |
124 | else if (PageTransCompound(page) && (PageLRU(compound_head(page)) || | 125 | else if (PageTransCompound(page)) { |
125 | PageAnon(compound_head(page)))) | 126 | struct page *head = compound_head(page); |
126 | u |= 1 << KPF_THP; | 127 | |
128 | if (PageLRU(head) || PageAnon(head)) | ||
129 | u |= 1 << KPF_THP; | ||
130 | else if (is_huge_zero_page(head)) { | ||
131 | u |= 1 << KPF_ZERO_PAGE; | ||
132 | u |= 1 << KPF_THP; | ||
133 | } | ||
134 | } else if (is_zero_pfn(page_to_pfn(page))) | ||
135 | u |= 1 << KPF_ZERO_PAGE; | ||
136 | |||
127 | 137 | ||
128 | /* | 138 | /* |
129 | * Caveats on high order pages: page->_count will only be set | 139 | * Caveats on high order pages: page->_count will only be set |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 246eae84b13b..956b75d61809 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -21,7 +21,7 @@ | |||
21 | 21 | ||
22 | void task_mem(struct seq_file *m, struct mm_struct *mm) | 22 | void task_mem(struct seq_file *m, struct mm_struct *mm) |
23 | { | 23 | { |
24 | unsigned long data, text, lib, swap; | 24 | unsigned long data, text, lib, swap, ptes, pmds; |
25 | unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; | 25 | unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; |
26 | 26 | ||
27 | /* | 27 | /* |
@@ -42,6 +42,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) | |||
42 | text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; | 42 | text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; |
43 | lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; | 43 | lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; |
44 | swap = get_mm_counter(mm, MM_SWAPENTS); | 44 | swap = get_mm_counter(mm, MM_SWAPENTS); |
45 | ptes = PTRS_PER_PTE * sizeof(pte_t) * atomic_long_read(&mm->nr_ptes); | ||
46 | pmds = PTRS_PER_PMD * sizeof(pmd_t) * mm_nr_pmds(mm); | ||
45 | seq_printf(m, | 47 | seq_printf(m, |
46 | "VmPeak:\t%8lu kB\n" | 48 | "VmPeak:\t%8lu kB\n" |
47 | "VmSize:\t%8lu kB\n" | 49 | "VmSize:\t%8lu kB\n" |
@@ -54,6 +56,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) | |||
54 | "VmExe:\t%8lu kB\n" | 56 | "VmExe:\t%8lu kB\n" |
55 | "VmLib:\t%8lu kB\n" | 57 | "VmLib:\t%8lu kB\n" |
56 | "VmPTE:\t%8lu kB\n" | 58 | "VmPTE:\t%8lu kB\n" |
59 | "VmPMD:\t%8lu kB\n" | ||
57 | "VmSwap:\t%8lu kB\n", | 60 | "VmSwap:\t%8lu kB\n", |
58 | hiwater_vm << (PAGE_SHIFT-10), | 61 | hiwater_vm << (PAGE_SHIFT-10), |
59 | total_vm << (PAGE_SHIFT-10), | 62 | total_vm << (PAGE_SHIFT-10), |
@@ -63,8 +66,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) | |||
63 | total_rss << (PAGE_SHIFT-10), | 66 | total_rss << (PAGE_SHIFT-10), |
64 | data << (PAGE_SHIFT-10), | 67 | data << (PAGE_SHIFT-10), |
65 | mm->stack_vm << (PAGE_SHIFT-10), text, lib, | 68 | mm->stack_vm << (PAGE_SHIFT-10), text, lib, |
66 | (PTRS_PER_PTE * sizeof(pte_t) * | 69 | ptes >> 10, |
67 | atomic_long_read(&mm->nr_ptes)) >> 10, | 70 | pmds >> 10, |
68 | swap << (PAGE_SHIFT-10)); | 71 | swap << (PAGE_SHIFT-10)); |
69 | } | 72 | } |
70 | 73 | ||
@@ -433,7 +436,6 @@ const struct file_operations proc_tid_maps_operations = { | |||
433 | 436 | ||
434 | #ifdef CONFIG_PROC_PAGE_MONITOR | 437 | #ifdef CONFIG_PROC_PAGE_MONITOR |
435 | struct mem_size_stats { | 438 | struct mem_size_stats { |
436 | struct vm_area_struct *vma; | ||
437 | unsigned long resident; | 439 | unsigned long resident; |
438 | unsigned long shared_clean; | 440 | unsigned long shared_clean; |
439 | unsigned long shared_dirty; | 441 | unsigned long shared_dirty; |
@@ -443,7 +445,6 @@ struct mem_size_stats { | |||
443 | unsigned long anonymous; | 445 | unsigned long anonymous; |
444 | unsigned long anonymous_thp; | 446 | unsigned long anonymous_thp; |
445 | unsigned long swap; | 447 | unsigned long swap; |
446 | unsigned long nonlinear; | ||
447 | u64 pss; | 448 | u64 pss; |
448 | }; | 449 | }; |
449 | 450 | ||
@@ -483,8 +484,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, | |||
483 | struct mm_walk *walk) | 484 | struct mm_walk *walk) |
484 | { | 485 | { |
485 | struct mem_size_stats *mss = walk->private; | 486 | struct mem_size_stats *mss = walk->private; |
486 | struct vm_area_struct *vma = mss->vma; | 487 | struct vm_area_struct *vma = walk->vma; |
487 | pgoff_t pgoff = linear_page_index(vma, addr); | ||
488 | struct page *page = NULL; | 488 | struct page *page = NULL; |
489 | 489 | ||
490 | if (pte_present(*pte)) { | 490 | if (pte_present(*pte)) { |
@@ -496,17 +496,10 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, | |||
496 | mss->swap += PAGE_SIZE; | 496 | mss->swap += PAGE_SIZE; |
497 | else if (is_migration_entry(swpent)) | 497 | else if (is_migration_entry(swpent)) |
498 | page = migration_entry_to_page(swpent); | 498 | page = migration_entry_to_page(swpent); |
499 | } else if (pte_file(*pte)) { | ||
500 | if (pte_to_pgoff(*pte) != pgoff) | ||
501 | mss->nonlinear += PAGE_SIZE; | ||
502 | } | 499 | } |
503 | 500 | ||
504 | if (!page) | 501 | if (!page) |
505 | return; | 502 | return; |
506 | |||
507 | if (page->index != pgoff) | ||
508 | mss->nonlinear += PAGE_SIZE; | ||
509 | |||
510 | smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte)); | 503 | smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte)); |
511 | } | 504 | } |
512 | 505 | ||
@@ -515,7 +508,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, | |||
515 | struct mm_walk *walk) | 508 | struct mm_walk *walk) |
516 | { | 509 | { |
517 | struct mem_size_stats *mss = walk->private; | 510 | struct mem_size_stats *mss = walk->private; |
518 | struct vm_area_struct *vma = mss->vma; | 511 | struct vm_area_struct *vma = walk->vma; |
519 | struct page *page; | 512 | struct page *page; |
520 | 513 | ||
521 | /* FOLL_DUMP will return -EFAULT on huge zero page */ | 514 | /* FOLL_DUMP will return -EFAULT on huge zero page */ |
@@ -536,8 +529,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, | |||
536 | static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | 529 | static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
537 | struct mm_walk *walk) | 530 | struct mm_walk *walk) |
538 | { | 531 | { |
539 | struct mem_size_stats *mss = walk->private; | 532 | struct vm_area_struct *vma = walk->vma; |
540 | struct vm_area_struct *vma = mss->vma; | ||
541 | pte_t *pte; | 533 | pte_t *pte; |
542 | spinlock_t *ptl; | 534 | spinlock_t *ptl; |
543 | 535 | ||
@@ -596,7 +588,6 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) | |||
596 | [ilog2(VM_ACCOUNT)] = "ac", | 588 | [ilog2(VM_ACCOUNT)] = "ac", |
597 | [ilog2(VM_NORESERVE)] = "nr", | 589 | [ilog2(VM_NORESERVE)] = "nr", |
598 | [ilog2(VM_HUGETLB)] = "ht", | 590 | [ilog2(VM_HUGETLB)] = "ht", |
599 | [ilog2(VM_NONLINEAR)] = "nl", | ||
600 | [ilog2(VM_ARCH_1)] = "ar", | 591 | [ilog2(VM_ARCH_1)] = "ar", |
601 | [ilog2(VM_DONTDUMP)] = "dd", | 592 | [ilog2(VM_DONTDUMP)] = "dd", |
602 | #ifdef CONFIG_MEM_SOFT_DIRTY | 593 | #ifdef CONFIG_MEM_SOFT_DIRTY |
@@ -630,10 +621,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) | |||
630 | }; | 621 | }; |
631 | 622 | ||
632 | memset(&mss, 0, sizeof mss); | 623 | memset(&mss, 0, sizeof mss); |
633 | mss.vma = vma; | ||
634 | /* mmap_sem is held in m_start */ | 624 | /* mmap_sem is held in m_start */ |
635 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) | 625 | walk_page_vma(vma, &smaps_walk); |
636 | walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); | ||
637 | 626 | ||
638 | show_map_vma(m, vma, is_pid); | 627 | show_map_vma(m, vma, is_pid); |
639 | 628 | ||
@@ -668,10 +657,6 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) | |||
668 | (vma->vm_flags & VM_LOCKED) ? | 657 | (vma->vm_flags & VM_LOCKED) ? |
669 | (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); | 658 | (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); |
670 | 659 | ||
671 | if (vma->vm_flags & VM_NONLINEAR) | ||
672 | seq_printf(m, "Nonlinear: %8lu kB\n", | ||
673 | mss.nonlinear >> 10); | ||
674 | |||
675 | show_smap_vma_flags(m, vma); | 660 | show_smap_vma_flags(m, vma); |
676 | m_cache_vma(m, vma); | 661 | m_cache_vma(m, vma); |
677 | return 0; | 662 | return 0; |
@@ -747,18 +732,18 @@ enum clear_refs_types { | |||
747 | CLEAR_REFS_ANON, | 732 | CLEAR_REFS_ANON, |
748 | CLEAR_REFS_MAPPED, | 733 | CLEAR_REFS_MAPPED, |
749 | CLEAR_REFS_SOFT_DIRTY, | 734 | CLEAR_REFS_SOFT_DIRTY, |
735 | CLEAR_REFS_MM_HIWATER_RSS, | ||
750 | CLEAR_REFS_LAST, | 736 | CLEAR_REFS_LAST, |
751 | }; | 737 | }; |
752 | 738 | ||
753 | struct clear_refs_private { | 739 | struct clear_refs_private { |
754 | struct vm_area_struct *vma; | ||
755 | enum clear_refs_types type; | 740 | enum clear_refs_types type; |
756 | }; | 741 | }; |
757 | 742 | ||
743 | #ifdef CONFIG_MEM_SOFT_DIRTY | ||
758 | static inline void clear_soft_dirty(struct vm_area_struct *vma, | 744 | static inline void clear_soft_dirty(struct vm_area_struct *vma, |
759 | unsigned long addr, pte_t *pte) | 745 | unsigned long addr, pte_t *pte) |
760 | { | 746 | { |
761 | #ifdef CONFIG_MEM_SOFT_DIRTY | ||
762 | /* | 747 | /* |
763 | * The soft-dirty tracker uses #PF-s to catch writes | 748 | * The soft-dirty tracker uses #PF-s to catch writes |
764 | * to pages, so write-protect the pte as well. See the | 749 | * to pages, so write-protect the pte as well. See the |
@@ -772,24 +757,63 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, | |||
772 | ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); | 757 | ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); |
773 | } else if (is_swap_pte(ptent)) { | 758 | } else if (is_swap_pte(ptent)) { |
774 | ptent = pte_swp_clear_soft_dirty(ptent); | 759 | ptent = pte_swp_clear_soft_dirty(ptent); |
775 | } else if (pte_file(ptent)) { | ||
776 | ptent = pte_file_clear_soft_dirty(ptent); | ||
777 | } | 760 | } |
778 | 761 | ||
779 | set_pte_at(vma->vm_mm, addr, pte, ptent); | 762 | set_pte_at(vma->vm_mm, addr, pte, ptent); |
780 | #endif | ||
781 | } | 763 | } |
782 | 764 | ||
765 | static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, | ||
766 | unsigned long addr, pmd_t *pmdp) | ||
767 | { | ||
768 | pmd_t pmd = *pmdp; | ||
769 | |||
770 | pmd = pmd_wrprotect(pmd); | ||
771 | pmd = pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY); | ||
772 | |||
773 | if (vma->vm_flags & VM_SOFTDIRTY) | ||
774 | vma->vm_flags &= ~VM_SOFTDIRTY; | ||
775 | |||
776 | set_pmd_at(vma->vm_mm, addr, pmdp, pmd); | ||
777 | } | ||
778 | |||
779 | #else | ||
780 | |||
781 | static inline void clear_soft_dirty(struct vm_area_struct *vma, | ||
782 | unsigned long addr, pte_t *pte) | ||
783 | { | ||
784 | } | ||
785 | |||
786 | static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, | ||
787 | unsigned long addr, pmd_t *pmdp) | ||
788 | { | ||
789 | } | ||
790 | #endif | ||
791 | |||
783 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | 792 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, |
784 | unsigned long end, struct mm_walk *walk) | 793 | unsigned long end, struct mm_walk *walk) |
785 | { | 794 | { |
786 | struct clear_refs_private *cp = walk->private; | 795 | struct clear_refs_private *cp = walk->private; |
787 | struct vm_area_struct *vma = cp->vma; | 796 | struct vm_area_struct *vma = walk->vma; |
788 | pte_t *pte, ptent; | 797 | pte_t *pte, ptent; |
789 | spinlock_t *ptl; | 798 | spinlock_t *ptl; |
790 | struct page *page; | 799 | struct page *page; |
791 | 800 | ||
792 | split_huge_page_pmd(vma, addr, pmd); | 801 | if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { |
802 | if (cp->type == CLEAR_REFS_SOFT_DIRTY) { | ||
803 | clear_soft_dirty_pmd(vma, addr, pmd); | ||
804 | goto out; | ||
805 | } | ||
806 | |||
807 | page = pmd_page(*pmd); | ||
808 | |||
809 | /* Clear accessed and referenced bits. */ | ||
810 | pmdp_test_and_clear_young(vma, addr, pmd); | ||
811 | ClearPageReferenced(page); | ||
812 | out: | ||
813 | spin_unlock(ptl); | ||
814 | return 0; | ||
815 | } | ||
816 | |||
793 | if (pmd_trans_unstable(pmd)) | 817 | if (pmd_trans_unstable(pmd)) |
794 | return 0; | 818 | return 0; |
795 | 819 | ||
@@ -818,6 +842,28 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
818 | return 0; | 842 | return 0; |
819 | } | 843 | } |
820 | 844 | ||
845 | static int clear_refs_test_walk(unsigned long start, unsigned long end, | ||
846 | struct mm_walk *walk) | ||
847 | { | ||
848 | struct clear_refs_private *cp = walk->private; | ||
849 | struct vm_area_struct *vma = walk->vma; | ||
850 | |||
851 | if (vma->vm_flags & VM_PFNMAP) | ||
852 | return 1; | ||
853 | |||
854 | /* | ||
855 | * Writing 1 to /proc/pid/clear_refs affects all pages. | ||
856 | * Writing 2 to /proc/pid/clear_refs only affects anonymous pages. | ||
857 | * Writing 3 to /proc/pid/clear_refs only affects file mapped pages. | ||
858 | * Writing 4 to /proc/pid/clear_refs affects all pages. | ||
859 | */ | ||
860 | if (cp->type == CLEAR_REFS_ANON && vma->vm_file) | ||
861 | return 1; | ||
862 | if (cp->type == CLEAR_REFS_MAPPED && !vma->vm_file) | ||
863 | return 1; | ||
864 | return 0; | ||
865 | } | ||
866 | |||
821 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, | 867 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, |
822 | size_t count, loff_t *ppos) | 868 | size_t count, loff_t *ppos) |
823 | { | 869 | { |
@@ -858,9 +904,22 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
858 | }; | 904 | }; |
859 | struct mm_walk clear_refs_walk = { | 905 | struct mm_walk clear_refs_walk = { |
860 | .pmd_entry = clear_refs_pte_range, | 906 | .pmd_entry = clear_refs_pte_range, |
907 | .test_walk = clear_refs_test_walk, | ||
861 | .mm = mm, | 908 | .mm = mm, |
862 | .private = &cp, | 909 | .private = &cp, |
863 | }; | 910 | }; |
911 | |||
912 | if (type == CLEAR_REFS_MM_HIWATER_RSS) { | ||
913 | /* | ||
914 | * Writing 5 to /proc/pid/clear_refs resets the peak | ||
915 | * resident set size to this mm's current rss value. | ||
916 | */ | ||
917 | down_write(&mm->mmap_sem); | ||
918 | reset_mm_hiwater_rss(mm); | ||
919 | up_write(&mm->mmap_sem); | ||
920 | goto out_mm; | ||
921 | } | ||
922 | |||
864 | down_read(&mm->mmap_sem); | 923 | down_read(&mm->mmap_sem); |
865 | if (type == CLEAR_REFS_SOFT_DIRTY) { | 924 | if (type == CLEAR_REFS_SOFT_DIRTY) { |
866 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 925 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
@@ -877,32 +936,12 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
877 | } | 936 | } |
878 | mmu_notifier_invalidate_range_start(mm, 0, -1); | 937 | mmu_notifier_invalidate_range_start(mm, 0, -1); |
879 | } | 938 | } |
880 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 939 | walk_page_range(0, ~0UL, &clear_refs_walk); |
881 | cp.vma = vma; | ||
882 | if (is_vm_hugetlb_page(vma)) | ||
883 | continue; | ||
884 | /* | ||
885 | * Writing 1 to /proc/pid/clear_refs affects all pages. | ||
886 | * | ||
887 | * Writing 2 to /proc/pid/clear_refs only affects | ||
888 | * Anonymous pages. | ||
889 | * | ||
890 | * Writing 3 to /proc/pid/clear_refs only affects file | ||
891 | * mapped pages. | ||
892 | * | ||
893 | * Writing 4 to /proc/pid/clear_refs affects all pages. | ||
894 | */ | ||
895 | if (type == CLEAR_REFS_ANON && vma->vm_file) | ||
896 | continue; | ||
897 | if (type == CLEAR_REFS_MAPPED && !vma->vm_file) | ||
898 | continue; | ||
899 | walk_page_range(vma->vm_start, vma->vm_end, | ||
900 | &clear_refs_walk); | ||
901 | } | ||
902 | if (type == CLEAR_REFS_SOFT_DIRTY) | 940 | if (type == CLEAR_REFS_SOFT_DIRTY) |
903 | mmu_notifier_invalidate_range_end(mm, 0, -1); | 941 | mmu_notifier_invalidate_range_end(mm, 0, -1); |
904 | flush_tlb_mm(mm); | 942 | flush_tlb_mm(mm); |
905 | up_read(&mm->mmap_sem); | 943 | up_read(&mm->mmap_sem); |
944 | out_mm: | ||
906 | mmput(mm); | 945 | mmput(mm); |
907 | } | 946 | } |
908 | put_task_struct(task); | 947 | put_task_struct(task); |
@@ -1066,15 +1105,13 @@ static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemap | |||
1066 | static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | 1105 | static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
1067 | struct mm_walk *walk) | 1106 | struct mm_walk *walk) |
1068 | { | 1107 | { |
1069 | struct vm_area_struct *vma; | 1108 | struct vm_area_struct *vma = walk->vma; |
1070 | struct pagemapread *pm = walk->private; | 1109 | struct pagemapread *pm = walk->private; |
1071 | spinlock_t *ptl; | 1110 | spinlock_t *ptl; |
1072 | pte_t *pte; | 1111 | pte_t *pte, *orig_pte; |
1073 | int err = 0; | 1112 | int err = 0; |
1074 | 1113 | ||
1075 | /* find the first VMA at or above 'addr' */ | 1114 | if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { |
1076 | vma = find_vma(walk->mm, addr); | ||
1077 | if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { | ||
1078 | int pmd_flags2; | 1115 | int pmd_flags2; |
1079 | 1116 | ||
1080 | if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd)) | 1117 | if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd)) |
@@ -1100,51 +1137,20 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
1100 | if (pmd_trans_unstable(pmd)) | 1137 | if (pmd_trans_unstable(pmd)) |
1101 | return 0; | 1138 | return 0; |
1102 | 1139 | ||
1103 | while (1) { | 1140 | /* |
1104 | /* End of address space hole, which we mark as non-present. */ | 1141 | * We can assume that @vma always points to a valid one and @end never |
1105 | unsigned long hole_end; | 1142 | * goes beyond vma->vm_end. |
1106 | 1143 | */ | |
1107 | if (vma) | 1144 | orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); |
1108 | hole_end = min(end, vma->vm_start); | 1145 | for (; addr < end; pte++, addr += PAGE_SIZE) { |
1109 | else | 1146 | pagemap_entry_t pme; |
1110 | hole_end = end; | ||
1111 | |||
1112 | for (; addr < hole_end; addr += PAGE_SIZE) { | ||
1113 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); | ||
1114 | |||
1115 | err = add_to_pagemap(addr, &pme, pm); | ||
1116 | if (err) | ||
1117 | return err; | ||
1118 | } | ||
1119 | |||
1120 | if (!vma || vma->vm_start >= end) | ||
1121 | break; | ||
1122 | /* | ||
1123 | * We can't possibly be in a hugetlb VMA. In general, | ||
1124 | * for a mm_walk with a pmd_entry and a hugetlb_entry, | ||
1125 | * the pmd_entry can only be called on addresses in a | ||
1126 | * hugetlb if the walk starts in a non-hugetlb VMA and | ||
1127 | * spans a hugepage VMA. Since pagemap_read walks are | ||
1128 | * PMD-sized and PMD-aligned, this will never be true. | ||
1129 | */ | ||
1130 | BUG_ON(is_vm_hugetlb_page(vma)); | ||
1131 | |||
1132 | /* Addresses in the VMA. */ | ||
1133 | for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) { | ||
1134 | pagemap_entry_t pme; | ||
1135 | pte = pte_offset_map(pmd, addr); | ||
1136 | pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); | ||
1137 | pte_unmap(pte); | ||
1138 | err = add_to_pagemap(addr, &pme, pm); | ||
1139 | if (err) | ||
1140 | return err; | ||
1141 | } | ||
1142 | 1147 | ||
1143 | if (addr == end) | 1148 | pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); |
1149 | err = add_to_pagemap(addr, &pme, pm); | ||
1150 | if (err) | ||
1144 | break; | 1151 | break; |
1145 | |||
1146 | vma = find_vma(walk->mm, addr); | ||
1147 | } | 1152 | } |
1153 | pte_unmap_unlock(orig_pte, ptl); | ||
1148 | 1154 | ||
1149 | cond_resched(); | 1155 | cond_resched(); |
1150 | 1156 | ||
@@ -1170,15 +1176,12 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, | |||
1170 | struct mm_walk *walk) | 1176 | struct mm_walk *walk) |
1171 | { | 1177 | { |
1172 | struct pagemapread *pm = walk->private; | 1178 | struct pagemapread *pm = walk->private; |
1173 | struct vm_area_struct *vma; | 1179 | struct vm_area_struct *vma = walk->vma; |
1174 | int err = 0; | 1180 | int err = 0; |
1175 | int flags2; | 1181 | int flags2; |
1176 | pagemap_entry_t pme; | 1182 | pagemap_entry_t pme; |
1177 | 1183 | ||
1178 | vma = find_vma(walk->mm, addr); | 1184 | if (vma->vm_flags & VM_SOFTDIRTY) |
1179 | WARN_ON_ONCE(!vma); | ||
1180 | |||
1181 | if (vma && (vma->vm_flags & VM_SOFTDIRTY)) | ||
1182 | flags2 = __PM_SOFT_DIRTY; | 1185 | flags2 = __PM_SOFT_DIRTY; |
1183 | else | 1186 | else |
1184 | flags2 = 0; | 1187 | flags2 = 0; |
@@ -1338,7 +1341,6 @@ const struct file_operations proc_pagemap_operations = { | |||
1338 | #ifdef CONFIG_NUMA | 1341 | #ifdef CONFIG_NUMA |
1339 | 1342 | ||
1340 | struct numa_maps { | 1343 | struct numa_maps { |
1341 | struct vm_area_struct *vma; | ||
1342 | unsigned long pages; | 1344 | unsigned long pages; |
1343 | unsigned long anon; | 1345 | unsigned long anon; |
1344 | unsigned long active; | 1346 | unsigned long active; |
@@ -1407,18 +1409,17 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, | |||
1407 | static int gather_pte_stats(pmd_t *pmd, unsigned long addr, | 1409 | static int gather_pte_stats(pmd_t *pmd, unsigned long addr, |
1408 | unsigned long end, struct mm_walk *walk) | 1410 | unsigned long end, struct mm_walk *walk) |
1409 | { | 1411 | { |
1410 | struct numa_maps *md; | 1412 | struct numa_maps *md = walk->private; |
1413 | struct vm_area_struct *vma = walk->vma; | ||
1411 | spinlock_t *ptl; | 1414 | spinlock_t *ptl; |
1412 | pte_t *orig_pte; | 1415 | pte_t *orig_pte; |
1413 | pte_t *pte; | 1416 | pte_t *pte; |
1414 | 1417 | ||
1415 | md = walk->private; | 1418 | if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { |
1416 | |||
1417 | if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) { | ||
1418 | pte_t huge_pte = *(pte_t *)pmd; | 1419 | pte_t huge_pte = *(pte_t *)pmd; |
1419 | struct page *page; | 1420 | struct page *page; |
1420 | 1421 | ||
1421 | page = can_gather_numa_stats(huge_pte, md->vma, addr); | 1422 | page = can_gather_numa_stats(huge_pte, vma, addr); |
1422 | if (page) | 1423 | if (page) |
1423 | gather_stats(page, md, pte_dirty(huge_pte), | 1424 | gather_stats(page, md, pte_dirty(huge_pte), |
1424 | HPAGE_PMD_SIZE/PAGE_SIZE); | 1425 | HPAGE_PMD_SIZE/PAGE_SIZE); |
@@ -1430,7 +1431,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, | |||
1430 | return 0; | 1431 | return 0; |
1431 | orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); | 1432 | orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); |
1432 | do { | 1433 | do { |
1433 | struct page *page = can_gather_numa_stats(*pte, md->vma, addr); | 1434 | struct page *page = can_gather_numa_stats(*pte, vma, addr); |
1434 | if (!page) | 1435 | if (!page) |
1435 | continue; | 1436 | continue; |
1436 | gather_stats(page, md, pte_dirty(*pte), 1); | 1437 | gather_stats(page, md, pte_dirty(*pte), 1); |
@@ -1440,7 +1441,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, | |||
1440 | return 0; | 1441 | return 0; |
1441 | } | 1442 | } |
1442 | #ifdef CONFIG_HUGETLB_PAGE | 1443 | #ifdef CONFIG_HUGETLB_PAGE |
1443 | static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, | 1444 | static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, |
1444 | unsigned long addr, unsigned long end, struct mm_walk *walk) | 1445 | unsigned long addr, unsigned long end, struct mm_walk *walk) |
1445 | { | 1446 | { |
1446 | struct numa_maps *md; | 1447 | struct numa_maps *md; |
@@ -1459,7 +1460,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, | |||
1459 | } | 1460 | } |
1460 | 1461 | ||
1461 | #else | 1462 | #else |
1462 | static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, | 1463 | static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, |
1463 | unsigned long addr, unsigned long end, struct mm_walk *walk) | 1464 | unsigned long addr, unsigned long end, struct mm_walk *walk) |
1464 | { | 1465 | { |
1465 | return 0; | 1466 | return 0; |
@@ -1477,7 +1478,12 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1477 | struct numa_maps *md = &numa_priv->md; | 1478 | struct numa_maps *md = &numa_priv->md; |
1478 | struct file *file = vma->vm_file; | 1479 | struct file *file = vma->vm_file; |
1479 | struct mm_struct *mm = vma->vm_mm; | 1480 | struct mm_struct *mm = vma->vm_mm; |
1480 | struct mm_walk walk = {}; | 1481 | struct mm_walk walk = { |
1482 | .hugetlb_entry = gather_hugetlb_stats, | ||
1483 | .pmd_entry = gather_pte_stats, | ||
1484 | .private = md, | ||
1485 | .mm = mm, | ||
1486 | }; | ||
1481 | struct mempolicy *pol; | 1487 | struct mempolicy *pol; |
1482 | char buffer[64]; | 1488 | char buffer[64]; |
1483 | int nid; | 1489 | int nid; |
@@ -1488,13 +1494,6 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1488 | /* Ensure we start with an empty set of numa_maps statistics. */ | 1494 | /* Ensure we start with an empty set of numa_maps statistics. */ |
1489 | memset(md, 0, sizeof(*md)); | 1495 | memset(md, 0, sizeof(*md)); |
1490 | 1496 | ||
1491 | md->vma = vma; | ||
1492 | |||
1493 | walk.hugetlb_entry = gather_hugetbl_stats; | ||
1494 | walk.pmd_entry = gather_pte_stats; | ||
1495 | walk.private = md; | ||
1496 | walk.mm = mm; | ||
1497 | |||
1498 | pol = __get_vma_policy(vma, vma->vm_start); | 1497 | pol = __get_vma_policy(vma, vma->vm_start); |
1499 | if (pol) { | 1498 | if (pol) { |
1500 | mpol_to_str(buffer, sizeof(buffer), pol); | 1499 | mpol_to_str(buffer, sizeof(buffer), pol); |
@@ -1528,7 +1527,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1528 | if (is_vm_hugetlb_page(vma)) | 1527 | if (is_vm_hugetlb_page(vma)) |
1529 | seq_puts(m, " huge"); | 1528 | seq_puts(m, " huge"); |
1530 | 1529 | ||
1531 | walk_page_range(vma->vm_start, vma->vm_end, &walk); | 1530 | /* mmap_sem is held by m_start */ |
1531 | walk_page_vma(vma, &walk); | ||
1532 | 1532 | ||
1533 | if (!md->pages) | 1533 | if (!md->pages) |
1534 | goto out; | 1534 | goto out; |
@@ -1557,6 +1557,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1557 | for_each_node_state(nid, N_MEMORY) | 1557 | for_each_node_state(nid, N_MEMORY) |
1558 | if (md->node[nid]) | 1558 | if (md->node[nid]) |
1559 | seq_printf(m, " N%d=%lu", nid, md->node[nid]); | 1559 | seq_printf(m, " N%d=%lu", nid, md->node[nid]); |
1560 | |||
1561 | seq_printf(m, " kernelpagesize_kB=%lu", vma_kernel_pagesize(vma) >> 10); | ||
1560 | out: | 1562 | out: |
1561 | seq_putc(m, '\n'); | 1563 | seq_putc(m, '\n'); |
1562 | m_cache_vma(m, vma); | 1564 | m_cache_vma(m, vma); |
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index a90d6d354199..4e61388ec03d 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c | |||
@@ -546,8 +546,8 @@ static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr) | |||
546 | nhdr_ptr = notes_section; | 546 | nhdr_ptr = notes_section; |
547 | while (nhdr_ptr->n_namesz != 0) { | 547 | while (nhdr_ptr->n_namesz != 0) { |
548 | sz = sizeof(Elf64_Nhdr) + | 548 | sz = sizeof(Elf64_Nhdr) + |
549 | ((nhdr_ptr->n_namesz + 3) & ~3) + | 549 | (((u64)nhdr_ptr->n_namesz + 3) & ~3) + |
550 | ((nhdr_ptr->n_descsz + 3) & ~3); | 550 | (((u64)nhdr_ptr->n_descsz + 3) & ~3); |
551 | if ((real_sz + sz) > max_sz) { | 551 | if ((real_sz + sz) > max_sz) { |
552 | pr_warn("Warning: Exceeded p_memsz, dropping PT_NOTE entry n_namesz=0x%x, n_descsz=0x%x\n", | 552 | pr_warn("Warning: Exceeded p_memsz, dropping PT_NOTE entry n_namesz=0x%x, n_descsz=0x%x\n", |
553 | nhdr_ptr->n_namesz, nhdr_ptr->n_descsz); | 553 | nhdr_ptr->n_namesz, nhdr_ptr->n_descsz); |
@@ -732,8 +732,8 @@ static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr) | |||
732 | nhdr_ptr = notes_section; | 732 | nhdr_ptr = notes_section; |
733 | while (nhdr_ptr->n_namesz != 0) { | 733 | while (nhdr_ptr->n_namesz != 0) { |
734 | sz = sizeof(Elf32_Nhdr) + | 734 | sz = sizeof(Elf32_Nhdr) + |
735 | ((nhdr_ptr->n_namesz + 3) & ~3) + | 735 | (((u64)nhdr_ptr->n_namesz + 3) & ~3) + |
736 | ((nhdr_ptr->n_descsz + 3) & ~3); | 736 | (((u64)nhdr_ptr->n_descsz + 3) & ~3); |
737 | if ((real_sz + sz) > max_sz) { | 737 | if ((real_sz + sz) > max_sz) { |
738 | pr_warn("Warning: Exceeded p_memsz, dropping PT_NOTE entry n_namesz=0x%x, n_descsz=0x%x\n", | 738 | pr_warn("Warning: Exceeded p_memsz, dropping PT_NOTE entry n_namesz=0x%x, n_descsz=0x%x\n", |
739 | nhdr_ptr->n_namesz, nhdr_ptr->n_descsz); | 739 | nhdr_ptr->n_namesz, nhdr_ptr->n_descsz); |
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 0f96f71ab32b..8db932da4009 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c | |||
@@ -44,6 +44,7 @@ static int show_sb_opts(struct seq_file *m, struct super_block *sb) | |||
44 | { MS_SYNCHRONOUS, ",sync" }, | 44 | { MS_SYNCHRONOUS, ",sync" }, |
45 | { MS_DIRSYNC, ",dirsync" }, | 45 | { MS_DIRSYNC, ",dirsync" }, |
46 | { MS_MANDLOCK, ",mand" }, | 46 | { MS_MANDLOCK, ",mand" }, |
47 | { MS_LAZYTIME, ",lazytime" }, | ||
47 | { 0, NULL } | 48 | { 0, NULL } |
48 | }; | 49 | }; |
49 | const struct proc_fs_info *fs_infop; | 50 | const struct proc_fs_info *fs_infop; |
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index 983d9510becc..916b8e23d968 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig | |||
@@ -21,6 +21,16 @@ config PSTORE_CONSOLE | |||
21 | When the option is enabled, pstore will log all kernel | 21 | When the option is enabled, pstore will log all kernel |
22 | messages, even if no oops or panic happened. | 22 | messages, even if no oops or panic happened. |
23 | 23 | ||
24 | config PSTORE_PMSG | ||
25 | bool "Log user space messages" | ||
26 | depends on PSTORE | ||
27 | help | ||
28 | When the option is enabled, pstore will export a character | ||
29 | interface /dev/pmsg0 to log user space messages. On reboot | ||
30 | data can be retrieved from /sys/fs/pstore/pmsg-ramoops-[ID]. | ||
31 | |||
32 | If unsure, say N. | ||
33 | |||
24 | config PSTORE_FTRACE | 34 | config PSTORE_FTRACE |
25 | bool "Persistent function tracer" | 35 | bool "Persistent function tracer" |
26 | depends on PSTORE | 36 | depends on PSTORE |
diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile index 4c9095c2781e..e647d8e81712 100644 --- a/fs/pstore/Makefile +++ b/fs/pstore/Makefile | |||
@@ -7,5 +7,7 @@ obj-y += pstore.o | |||
7 | pstore-objs += inode.o platform.o | 7 | pstore-objs += inode.o platform.o |
8 | obj-$(CONFIG_PSTORE_FTRACE) += ftrace.o | 8 | obj-$(CONFIG_PSTORE_FTRACE) += ftrace.o |
9 | 9 | ||
10 | obj-$(CONFIG_PSTORE_PMSG) += pmsg.o | ||
11 | |||
10 | ramoops-objs += ram.o ram_core.o | 12 | ramoops-objs += ram.o ram_core.o |
11 | obj-$(CONFIG_PSTORE_RAM) += ramoops.o | 13 | obj-$(CONFIG_PSTORE_RAM) += ramoops.o |
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 50416602774d..b32ce53d24ee 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c | |||
@@ -338,32 +338,38 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, | |||
338 | 338 | ||
339 | switch (type) { | 339 | switch (type) { |
340 | case PSTORE_TYPE_DMESG: | 340 | case PSTORE_TYPE_DMESG: |
341 | sprintf(name, "dmesg-%s-%lld%s", psname, id, | 341 | scnprintf(name, sizeof(name), "dmesg-%s-%lld%s", |
342 | compressed ? ".enc.z" : ""); | 342 | psname, id, compressed ? ".enc.z" : ""); |
343 | break; | 343 | break; |
344 | case PSTORE_TYPE_CONSOLE: | 344 | case PSTORE_TYPE_CONSOLE: |
345 | sprintf(name, "console-%s-%lld", psname, id); | 345 | scnprintf(name, sizeof(name), "console-%s-%lld", psname, id); |
346 | break; | 346 | break; |
347 | case PSTORE_TYPE_FTRACE: | 347 | case PSTORE_TYPE_FTRACE: |
348 | sprintf(name, "ftrace-%s-%lld", psname, id); | 348 | scnprintf(name, sizeof(name), "ftrace-%s-%lld", psname, id); |
349 | break; | 349 | break; |
350 | case PSTORE_TYPE_MCE: | 350 | case PSTORE_TYPE_MCE: |
351 | sprintf(name, "mce-%s-%lld", psname, id); | 351 | scnprintf(name, sizeof(name), "mce-%s-%lld", psname, id); |
352 | break; | 352 | break; |
353 | case PSTORE_TYPE_PPC_RTAS: | 353 | case PSTORE_TYPE_PPC_RTAS: |
354 | sprintf(name, "rtas-%s-%lld", psname, id); | 354 | scnprintf(name, sizeof(name), "rtas-%s-%lld", psname, id); |
355 | break; | 355 | break; |
356 | case PSTORE_TYPE_PPC_OF: | 356 | case PSTORE_TYPE_PPC_OF: |
357 | sprintf(name, "powerpc-ofw-%s-%lld", psname, id); | 357 | scnprintf(name, sizeof(name), "powerpc-ofw-%s-%lld", |
358 | psname, id); | ||
358 | break; | 359 | break; |
359 | case PSTORE_TYPE_PPC_COMMON: | 360 | case PSTORE_TYPE_PPC_COMMON: |
360 | sprintf(name, "powerpc-common-%s-%lld", psname, id); | 361 | scnprintf(name, sizeof(name), "powerpc-common-%s-%lld", |
362 | psname, id); | ||
363 | break; | ||
364 | case PSTORE_TYPE_PMSG: | ||
365 | scnprintf(name, sizeof(name), "pmsg-%s-%lld", psname, id); | ||
361 | break; | 366 | break; |
362 | case PSTORE_TYPE_UNKNOWN: | 367 | case PSTORE_TYPE_UNKNOWN: |
363 | sprintf(name, "unknown-%s-%lld", psname, id); | 368 | scnprintf(name, sizeof(name), "unknown-%s-%lld", psname, id); |
364 | break; | 369 | break; |
365 | default: | 370 | default: |
366 | sprintf(name, "type%d-%s-%lld", type, psname, id); | 371 | scnprintf(name, sizeof(name), "type%d-%s-%lld", |
372 | type, psname, id); | ||
367 | break; | 373 | break; |
368 | } | 374 | } |
369 | 375 | ||
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index 3b3d305277c4..c36ba2cd0b5d 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h | |||
@@ -45,6 +45,12 @@ extern void pstore_register_ftrace(void); | |||
45 | static inline void pstore_register_ftrace(void) {} | 45 | static inline void pstore_register_ftrace(void) {} |
46 | #endif | 46 | #endif |
47 | 47 | ||
48 | #ifdef CONFIG_PSTORE_PMSG | ||
49 | extern void pstore_register_pmsg(void); | ||
50 | #else | ||
51 | static inline void pstore_register_pmsg(void) {} | ||
52 | #endif | ||
53 | |||
48 | extern struct pstore_info *psinfo; | 54 | extern struct pstore_info *psinfo; |
49 | 55 | ||
50 | extern void pstore_set_kmsg_bytes(int); | 56 | extern void pstore_set_kmsg_bytes(int); |
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 0a9b72cdfeca..c4c9a10c5760 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c | |||
@@ -301,7 +301,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, | |||
301 | 301 | ||
302 | if (big_oops_buf) { | 302 | if (big_oops_buf) { |
303 | dst = big_oops_buf; | 303 | dst = big_oops_buf; |
304 | hsize = sprintf(dst, "%s#%d Part%d\n", why, | 304 | hsize = sprintf(dst, "%s#%d Part%u\n", why, |
305 | oopscount, part); | 305 | oopscount, part); |
306 | size = big_oops_buf_sz - hsize; | 306 | size = big_oops_buf_sz - hsize; |
307 | 307 | ||
@@ -321,7 +321,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, | |||
321 | } | 321 | } |
322 | } else { | 322 | } else { |
323 | dst = psinfo->buf; | 323 | dst = psinfo->buf; |
324 | hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, | 324 | hsize = sprintf(dst, "%s#%d Part%u\n", why, oopscount, |
325 | part); | 325 | part); |
326 | size = psinfo->bufsize - hsize; | 326 | size = psinfo->bufsize - hsize; |
327 | dst += hsize; | 327 | dst += hsize; |
@@ -447,6 +447,7 @@ int pstore_register(struct pstore_info *psi) | |||
447 | if ((psi->flags & PSTORE_FLAGS_FRAGILE) == 0) { | 447 | if ((psi->flags & PSTORE_FLAGS_FRAGILE) == 0) { |
448 | pstore_register_console(); | 448 | pstore_register_console(); |
449 | pstore_register_ftrace(); | 449 | pstore_register_ftrace(); |
450 | pstore_register_pmsg(); | ||
450 | } | 451 | } |
451 | 452 | ||
452 | if (pstore_update_ms >= 0) { | 453 | if (pstore_update_ms >= 0) { |
diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c new file mode 100644 index 000000000000..feb5dd2948b4 --- /dev/null +++ b/fs/pstore/pmsg.c | |||
@@ -0,0 +1,114 @@ | |||
1 | /* | ||
2 | * Copyright 2014 Google, Inc. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | */ | ||
13 | |||
14 | #include <linux/cdev.h> | ||
15 | #include <linux/device.h> | ||
16 | #include <linux/fs.h> | ||
17 | #include <linux/uaccess.h> | ||
18 | #include <linux/vmalloc.h> | ||
19 | #include "internal.h" | ||
20 | |||
21 | static DEFINE_MUTEX(pmsg_lock); | ||
22 | #define PMSG_MAX_BOUNCE_BUFFER_SIZE (2*PAGE_SIZE) | ||
23 | |||
24 | static ssize_t write_pmsg(struct file *file, const char __user *buf, | ||
25 | size_t count, loff_t *ppos) | ||
26 | { | ||
27 | size_t i, buffer_size; | ||
28 | char *buffer; | ||
29 | |||
30 | if (!count) | ||
31 | return 0; | ||
32 | |||
33 | if (!access_ok(VERIFY_READ, buf, count)) | ||
34 | return -EFAULT; | ||
35 | |||
36 | buffer_size = count; | ||
37 | if (buffer_size > PMSG_MAX_BOUNCE_BUFFER_SIZE) | ||
38 | buffer_size = PMSG_MAX_BOUNCE_BUFFER_SIZE; | ||
39 | buffer = vmalloc(buffer_size); | ||
40 | |||
41 | mutex_lock(&pmsg_lock); | ||
42 | for (i = 0; i < count; ) { | ||
43 | size_t c = min(count - i, buffer_size); | ||
44 | u64 id; | ||
45 | long ret; | ||
46 | |||
47 | ret = __copy_from_user(buffer, buf + i, c); | ||
48 | if (unlikely(ret != 0)) { | ||
49 | mutex_unlock(&pmsg_lock); | ||
50 | vfree(buffer); | ||
51 | return -EFAULT; | ||
52 | } | ||
53 | psinfo->write_buf(PSTORE_TYPE_PMSG, 0, &id, 0, buffer, 0, c, | ||
54 | psinfo); | ||
55 | |||
56 | i += c; | ||
57 | } | ||
58 | |||
59 | mutex_unlock(&pmsg_lock); | ||
60 | vfree(buffer); | ||
61 | return count; | ||
62 | } | ||
63 | |||
64 | static const struct file_operations pmsg_fops = { | ||
65 | .owner = THIS_MODULE, | ||
66 | .llseek = noop_llseek, | ||
67 | .write = write_pmsg, | ||
68 | }; | ||
69 | |||
70 | static struct class *pmsg_class; | ||
71 | static int pmsg_major; | ||
72 | #define PMSG_NAME "pmsg" | ||
73 | #undef pr_fmt | ||
74 | #define pr_fmt(fmt) PMSG_NAME ": " fmt | ||
75 | |||
76 | static char *pmsg_devnode(struct device *dev, umode_t *mode) | ||
77 | { | ||
78 | if (mode) | ||
79 | *mode = 0220; | ||
80 | return NULL; | ||
81 | } | ||
82 | |||
83 | void pstore_register_pmsg(void) | ||
84 | { | ||
85 | struct device *pmsg_device; | ||
86 | |||
87 | pmsg_major = register_chrdev(0, PMSG_NAME, &pmsg_fops); | ||
88 | if (pmsg_major < 0) { | ||
89 | pr_err("register_chrdev failed\n"); | ||
90 | goto err; | ||
91 | } | ||
92 | |||
93 | pmsg_class = class_create(THIS_MODULE, PMSG_NAME); | ||
94 | if (IS_ERR(pmsg_class)) { | ||
95 | pr_err("device class file already in use\n"); | ||
96 | goto err_class; | ||
97 | } | ||
98 | pmsg_class->devnode = pmsg_devnode; | ||
99 | |||
100 | pmsg_device = device_create(pmsg_class, NULL, MKDEV(pmsg_major, 0), | ||
101 | NULL, "%s%d", PMSG_NAME, 0); | ||
102 | if (IS_ERR(pmsg_device)) { | ||
103 | pr_err("failed to create device\n"); | ||
104 | goto err_device; | ||
105 | } | ||
106 | return; | ||
107 | |||
108 | err_device: | ||
109 | class_destroy(pmsg_class); | ||
110 | err_class: | ||
111 | unregister_chrdev(pmsg_major, PMSG_NAME); | ||
112 | err: | ||
113 | return; | ||
114 | } | ||
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 8613e5b35c22..39d1373128e9 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c | |||
@@ -51,6 +51,10 @@ static ulong ramoops_ftrace_size = MIN_MEM_SIZE; | |||
51 | module_param_named(ftrace_size, ramoops_ftrace_size, ulong, 0400); | 51 | module_param_named(ftrace_size, ramoops_ftrace_size, ulong, 0400); |
52 | MODULE_PARM_DESC(ftrace_size, "size of ftrace log"); | 52 | MODULE_PARM_DESC(ftrace_size, "size of ftrace log"); |
53 | 53 | ||
54 | static ulong ramoops_pmsg_size = MIN_MEM_SIZE; | ||
55 | module_param_named(pmsg_size, ramoops_pmsg_size, ulong, 0400); | ||
56 | MODULE_PARM_DESC(pmsg_size, "size of user space message log"); | ||
57 | |||
54 | static ulong mem_address; | 58 | static ulong mem_address; |
55 | module_param(mem_address, ulong, 0400); | 59 | module_param(mem_address, ulong, 0400); |
56 | MODULE_PARM_DESC(mem_address, | 60 | MODULE_PARM_DESC(mem_address, |
@@ -82,12 +86,14 @@ struct ramoops_context { | |||
82 | struct persistent_ram_zone **przs; | 86 | struct persistent_ram_zone **przs; |
83 | struct persistent_ram_zone *cprz; | 87 | struct persistent_ram_zone *cprz; |
84 | struct persistent_ram_zone *fprz; | 88 | struct persistent_ram_zone *fprz; |
89 | struct persistent_ram_zone *mprz; | ||
85 | phys_addr_t phys_addr; | 90 | phys_addr_t phys_addr; |
86 | unsigned long size; | 91 | unsigned long size; |
87 | unsigned int memtype; | 92 | unsigned int memtype; |
88 | size_t record_size; | 93 | size_t record_size; |
89 | size_t console_size; | 94 | size_t console_size; |
90 | size_t ftrace_size; | 95 | size_t ftrace_size; |
96 | size_t pmsg_size; | ||
91 | int dump_oops; | 97 | int dump_oops; |
92 | struct persistent_ram_ecc_info ecc_info; | 98 | struct persistent_ram_ecc_info ecc_info; |
93 | unsigned int max_dump_cnt; | 99 | unsigned int max_dump_cnt; |
@@ -96,6 +102,7 @@ struct ramoops_context { | |||
96 | unsigned int dump_read_cnt; | 102 | unsigned int dump_read_cnt; |
97 | unsigned int console_read_cnt; | 103 | unsigned int console_read_cnt; |
98 | unsigned int ftrace_read_cnt; | 104 | unsigned int ftrace_read_cnt; |
105 | unsigned int pmsg_read_cnt; | ||
99 | struct pstore_info pstore; | 106 | struct pstore_info pstore; |
100 | }; | 107 | }; |
101 | 108 | ||
@@ -109,6 +116,7 @@ static int ramoops_pstore_open(struct pstore_info *psi) | |||
109 | cxt->dump_read_cnt = 0; | 116 | cxt->dump_read_cnt = 0; |
110 | cxt->console_read_cnt = 0; | 117 | cxt->console_read_cnt = 0; |
111 | cxt->ftrace_read_cnt = 0; | 118 | cxt->ftrace_read_cnt = 0; |
119 | cxt->pmsg_read_cnt = 0; | ||
112 | return 0; | 120 | return 0; |
113 | } | 121 | } |
114 | 122 | ||
@@ -164,6 +172,12 @@ static int ramoops_read_kmsg_hdr(char *buffer, struct timespec *time, | |||
164 | return header_length; | 172 | return header_length; |
165 | } | 173 | } |
166 | 174 | ||
175 | static bool prz_ok(struct persistent_ram_zone *prz) | ||
176 | { | ||
177 | return !!prz && !!(persistent_ram_old_size(prz) + | ||
178 | persistent_ram_ecc_string(prz, NULL, 0)); | ||
179 | } | ||
180 | |||
167 | static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, | 181 | static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, |
168 | int *count, struct timespec *time, | 182 | int *count, struct timespec *time, |
169 | char **buf, bool *compressed, | 183 | char **buf, bool *compressed, |
@@ -178,13 +192,16 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, | |||
178 | prz = ramoops_get_next_prz(cxt->przs, &cxt->dump_read_cnt, | 192 | prz = ramoops_get_next_prz(cxt->przs, &cxt->dump_read_cnt, |
179 | cxt->max_dump_cnt, id, type, | 193 | cxt->max_dump_cnt, id, type, |
180 | PSTORE_TYPE_DMESG, 1); | 194 | PSTORE_TYPE_DMESG, 1); |
181 | if (!prz) | 195 | if (!prz_ok(prz)) |
182 | prz = ramoops_get_next_prz(&cxt->cprz, &cxt->console_read_cnt, | 196 | prz = ramoops_get_next_prz(&cxt->cprz, &cxt->console_read_cnt, |
183 | 1, id, type, PSTORE_TYPE_CONSOLE, 0); | 197 | 1, id, type, PSTORE_TYPE_CONSOLE, 0); |
184 | if (!prz) | 198 | if (!prz_ok(prz)) |
185 | prz = ramoops_get_next_prz(&cxt->fprz, &cxt->ftrace_read_cnt, | 199 | prz = ramoops_get_next_prz(&cxt->fprz, &cxt->ftrace_read_cnt, |
186 | 1, id, type, PSTORE_TYPE_FTRACE, 0); | 200 | 1, id, type, PSTORE_TYPE_FTRACE, 0); |
187 | if (!prz) | 201 | if (!prz_ok(prz)) |
202 | prz = ramoops_get_next_prz(&cxt->mprz, &cxt->pmsg_read_cnt, | ||
203 | 1, id, type, PSTORE_TYPE_PMSG, 0); | ||
204 | if (!prz_ok(prz)) | ||
188 | return 0; | 205 | return 0; |
189 | 206 | ||
190 | if (!persistent_ram_old(prz)) | 207 | if (!persistent_ram_old(prz)) |
@@ -252,6 +269,11 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, | |||
252 | return -ENOMEM; | 269 | return -ENOMEM; |
253 | persistent_ram_write(cxt->fprz, buf, size); | 270 | persistent_ram_write(cxt->fprz, buf, size); |
254 | return 0; | 271 | return 0; |
272 | } else if (type == PSTORE_TYPE_PMSG) { | ||
273 | if (!cxt->mprz) | ||
274 | return -ENOMEM; | ||
275 | persistent_ram_write(cxt->mprz, buf, size); | ||
276 | return 0; | ||
255 | } | 277 | } |
256 | 278 | ||
257 | if (type != PSTORE_TYPE_DMESG) | 279 | if (type != PSTORE_TYPE_DMESG) |
@@ -309,6 +331,9 @@ static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, int count, | |||
309 | case PSTORE_TYPE_FTRACE: | 331 | case PSTORE_TYPE_FTRACE: |
310 | prz = cxt->fprz; | 332 | prz = cxt->fprz; |
311 | break; | 333 | break; |
334 | case PSTORE_TYPE_PMSG: | ||
335 | prz = cxt->mprz; | ||
336 | break; | ||
312 | default: | 337 | default: |
313 | return -EINVAL; | 338 | return -EINVAL; |
314 | } | 339 | } |
@@ -435,7 +460,7 @@ static int ramoops_probe(struct platform_device *pdev) | |||
435 | goto fail_out; | 460 | goto fail_out; |
436 | 461 | ||
437 | if (!pdata->mem_size || (!pdata->record_size && !pdata->console_size && | 462 | if (!pdata->mem_size || (!pdata->record_size && !pdata->console_size && |
438 | !pdata->ftrace_size)) { | 463 | !pdata->ftrace_size && !pdata->pmsg_size)) { |
439 | pr_err("The memory size and the record/console size must be " | 464 | pr_err("The memory size and the record/console size must be " |
440 | "non-zero\n"); | 465 | "non-zero\n"); |
441 | goto fail_out; | 466 | goto fail_out; |
@@ -447,6 +472,8 @@ static int ramoops_probe(struct platform_device *pdev) | |||
447 | pdata->console_size = rounddown_pow_of_two(pdata->console_size); | 472 | pdata->console_size = rounddown_pow_of_two(pdata->console_size); |
448 | if (pdata->ftrace_size && !is_power_of_2(pdata->ftrace_size)) | 473 | if (pdata->ftrace_size && !is_power_of_2(pdata->ftrace_size)) |
449 | pdata->ftrace_size = rounddown_pow_of_two(pdata->ftrace_size); | 474 | pdata->ftrace_size = rounddown_pow_of_two(pdata->ftrace_size); |
475 | if (pdata->pmsg_size && !is_power_of_2(pdata->pmsg_size)) | ||
476 | pdata->pmsg_size = rounddown_pow_of_two(pdata->pmsg_size); | ||
450 | 477 | ||
451 | cxt->size = pdata->mem_size; | 478 | cxt->size = pdata->mem_size; |
452 | cxt->phys_addr = pdata->mem_address; | 479 | cxt->phys_addr = pdata->mem_address; |
@@ -454,12 +481,14 @@ static int ramoops_probe(struct platform_device *pdev) | |||
454 | cxt->record_size = pdata->record_size; | 481 | cxt->record_size = pdata->record_size; |
455 | cxt->console_size = pdata->console_size; | 482 | cxt->console_size = pdata->console_size; |
456 | cxt->ftrace_size = pdata->ftrace_size; | 483 | cxt->ftrace_size = pdata->ftrace_size; |
484 | cxt->pmsg_size = pdata->pmsg_size; | ||
457 | cxt->dump_oops = pdata->dump_oops; | 485 | cxt->dump_oops = pdata->dump_oops; |
458 | cxt->ecc_info = pdata->ecc_info; | 486 | cxt->ecc_info = pdata->ecc_info; |
459 | 487 | ||
460 | paddr = cxt->phys_addr; | 488 | paddr = cxt->phys_addr; |
461 | 489 | ||
462 | dump_mem_sz = cxt->size - cxt->console_size - cxt->ftrace_size; | 490 | dump_mem_sz = cxt->size - cxt->console_size - cxt->ftrace_size |
491 | - cxt->pmsg_size; | ||
463 | err = ramoops_init_przs(dev, cxt, &paddr, dump_mem_sz); | 492 | err = ramoops_init_przs(dev, cxt, &paddr, dump_mem_sz); |
464 | if (err) | 493 | if (err) |
465 | goto fail_out; | 494 | goto fail_out; |
@@ -474,13 +503,9 @@ static int ramoops_probe(struct platform_device *pdev) | |||
474 | if (err) | 503 | if (err) |
475 | goto fail_init_fprz; | 504 | goto fail_init_fprz; |
476 | 505 | ||
477 | if (!cxt->przs && !cxt->cprz && !cxt->fprz) { | 506 | err = ramoops_init_prz(dev, cxt, &cxt->mprz, &paddr, cxt->pmsg_size, 0); |
478 | pr_err("memory size too small, minimum is %zu\n", | 507 | if (err) |
479 | cxt->console_size + cxt->record_size + | 508 | goto fail_init_mprz; |
480 | cxt->ftrace_size); | ||
481 | err = -EINVAL; | ||
482 | goto fail_cnt; | ||
483 | } | ||
484 | 509 | ||
485 | cxt->pstore.data = cxt; | 510 | cxt->pstore.data = cxt; |
486 | /* | 511 | /* |
@@ -525,7 +550,8 @@ fail_buf: | |||
525 | kfree(cxt->pstore.buf); | 550 | kfree(cxt->pstore.buf); |
526 | fail_clear: | 551 | fail_clear: |
527 | cxt->pstore.bufsize = 0; | 552 | cxt->pstore.bufsize = 0; |
528 | fail_cnt: | 553 | kfree(cxt->mprz); |
554 | fail_init_mprz: | ||
529 | kfree(cxt->fprz); | 555 | kfree(cxt->fprz); |
530 | fail_init_fprz: | 556 | fail_init_fprz: |
531 | kfree(cxt->cprz); | 557 | kfree(cxt->cprz); |
@@ -583,6 +609,7 @@ static void ramoops_register_dummy(void) | |||
583 | dummy_data->record_size = record_size; | 609 | dummy_data->record_size = record_size; |
584 | dummy_data->console_size = ramoops_console_size; | 610 | dummy_data->console_size = ramoops_console_size; |
585 | dummy_data->ftrace_size = ramoops_ftrace_size; | 611 | dummy_data->ftrace_size = ramoops_ftrace_size; |
612 | dummy_data->pmsg_size = ramoops_pmsg_size; | ||
586 | dummy_data->dump_oops = dump_oops; | 613 | dummy_data->dump_oops = dump_oops; |
587 | /* | 614 | /* |
588 | * For backwards compatibility ramoops.ecc=1 means 16 bytes ECC | 615 | * For backwards compatibility ramoops.ecc=1 means 16 bytes ECC |
diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig index c51df1dd237e..4a09975aac90 100644 --- a/fs/quota/Kconfig +++ b/fs/quota/Kconfig | |||
@@ -5,6 +5,7 @@ | |||
5 | config QUOTA | 5 | config QUOTA |
6 | bool "Quota support" | 6 | bool "Quota support" |
7 | select QUOTACTL | 7 | select QUOTACTL |
8 | select SRCU | ||
8 | help | 9 | help |
9 | If you say Y here, you will be able to set per user limits for disk | 10 | If you say Y here, you will be able to set per user limits for disk |
10 | usage (also called disk quotas). Currently, it works for the | 11 | usage (also called disk quotas). Currently, it works for the |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 69df5b239844..0ccd4ba3a246 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -1248,7 +1248,7 @@ static int ignore_hardlimit(struct dquot *dquot) | |||
1248 | 1248 | ||
1249 | return capable(CAP_SYS_RESOURCE) && | 1249 | return capable(CAP_SYS_RESOURCE) && |
1250 | (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD || | 1250 | (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD || |
1251 | !(info->dqi_flags & V1_DQF_RSQUASH)); | 1251 | !(info->dqi_flags & DQF_ROOT_SQUASH)); |
1252 | } | 1252 | } |
1253 | 1253 | ||
1254 | /* needs dq_data_lock */ | 1254 | /* needs dq_data_lock */ |
@@ -2385,14 +2385,84 @@ out: | |||
2385 | } | 2385 | } |
2386 | EXPORT_SYMBOL(dquot_quota_on_mount); | 2386 | EXPORT_SYMBOL(dquot_quota_on_mount); |
2387 | 2387 | ||
2388 | static inline qsize_t qbtos(qsize_t blocks) | 2388 | static int dquot_quota_enable(struct super_block *sb, unsigned int flags) |
2389 | { | 2389 | { |
2390 | return blocks << QIF_DQBLKSIZE_BITS; | 2390 | int ret; |
2391 | int type; | ||
2392 | struct quota_info *dqopt = sb_dqopt(sb); | ||
2393 | |||
2394 | if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) | ||
2395 | return -ENOSYS; | ||
2396 | /* Accounting cannot be turned on while fs is mounted */ | ||
2397 | flags &= ~(FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT | FS_QUOTA_PDQ_ACCT); | ||
2398 | if (!flags) | ||
2399 | return -EINVAL; | ||
2400 | for (type = 0; type < MAXQUOTAS; type++) { | ||
2401 | if (!(flags & qtype_enforce_flag(type))) | ||
2402 | continue; | ||
2403 | /* Can't enforce without accounting */ | ||
2404 | if (!sb_has_quota_usage_enabled(sb, type)) | ||
2405 | return -EINVAL; | ||
2406 | ret = dquot_enable(dqopt->files[type], type, | ||
2407 | dqopt->info[type].dqi_fmt_id, | ||
2408 | DQUOT_LIMITS_ENABLED); | ||
2409 | if (ret < 0) | ||
2410 | goto out_err; | ||
2411 | } | ||
2412 | return 0; | ||
2413 | out_err: | ||
2414 | /* Backout enforcement enablement we already did */ | ||
2415 | for (type--; type >= 0; type--) { | ||
2416 | if (flags & qtype_enforce_flag(type)) | ||
2417 | dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); | ||
2418 | } | ||
2419 | /* Error code translation for better compatibility with XFS */ | ||
2420 | if (ret == -EBUSY) | ||
2421 | ret = -EEXIST; | ||
2422 | return ret; | ||
2391 | } | 2423 | } |
2392 | 2424 | ||
2393 | static inline qsize_t stoqb(qsize_t space) | 2425 | static int dquot_quota_disable(struct super_block *sb, unsigned int flags) |
2394 | { | 2426 | { |
2395 | return (space + QIF_DQBLKSIZE - 1) >> QIF_DQBLKSIZE_BITS; | 2427 | int ret; |
2428 | int type; | ||
2429 | struct quota_info *dqopt = sb_dqopt(sb); | ||
2430 | |||
2431 | if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) | ||
2432 | return -ENOSYS; | ||
2433 | /* | ||
2434 | * We don't support turning off accounting via quotactl. In principle | ||
2435 | * quota infrastructure can do this but filesystems don't expect | ||
2436 | * userspace to be able to do it. | ||
2437 | */ | ||
2438 | if (flags & | ||
2439 | (FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT | FS_QUOTA_PDQ_ACCT)) | ||
2440 | return -EOPNOTSUPP; | ||
2441 | |||
2442 | /* Filter out limits not enabled */ | ||
2443 | for (type = 0; type < MAXQUOTAS; type++) | ||
2444 | if (!sb_has_quota_limits_enabled(sb, type)) | ||
2445 | flags &= ~qtype_enforce_flag(type); | ||
2446 | /* Nothing left? */ | ||
2447 | if (!flags) | ||
2448 | return -EEXIST; | ||
2449 | for (type = 0; type < MAXQUOTAS; type++) { | ||
2450 | if (flags & qtype_enforce_flag(type)) { | ||
2451 | ret = dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); | ||
2452 | if (ret < 0) | ||
2453 | goto out_err; | ||
2454 | } | ||
2455 | } | ||
2456 | return 0; | ||
2457 | out_err: | ||
2458 | /* Backout enforcement disabling we already did */ | ||
2459 | for (type--; type >= 0; type--) { | ||
2460 | if (flags & qtype_enforce_flag(type)) | ||
2461 | dquot_enable(dqopt->files[type], type, | ||
2462 | dqopt->info[type].dqi_fmt_id, | ||
2463 | DQUOT_LIMITS_ENABLED); | ||
2464 | } | ||
2465 | return ret; | ||
2396 | } | 2466 | } |
2397 | 2467 | ||
2398 | /* Generic routine for getting common part of quota structure */ | 2468 | /* Generic routine for getting common part of quota structure */ |
@@ -2444,13 +2514,13 @@ static int do_set_dqblk(struct dquot *dquot, struct qc_dqblk *di) | |||
2444 | return -EINVAL; | 2514 | return -EINVAL; |
2445 | 2515 | ||
2446 | if (((di->d_fieldmask & QC_SPC_SOFT) && | 2516 | if (((di->d_fieldmask & QC_SPC_SOFT) && |
2447 | stoqb(di->d_spc_softlimit) > dqi->dqi_maxblimit) || | 2517 | di->d_spc_softlimit > dqi->dqi_max_spc_limit) || |
2448 | ((di->d_fieldmask & QC_SPC_HARD) && | 2518 | ((di->d_fieldmask & QC_SPC_HARD) && |
2449 | stoqb(di->d_spc_hardlimit) > dqi->dqi_maxblimit) || | 2519 | di->d_spc_hardlimit > dqi->dqi_max_spc_limit) || |
2450 | ((di->d_fieldmask & QC_INO_SOFT) && | 2520 | ((di->d_fieldmask & QC_INO_SOFT) && |
2451 | (di->d_ino_softlimit > dqi->dqi_maxilimit)) || | 2521 | (di->d_ino_softlimit > dqi->dqi_max_ino_limit)) || |
2452 | ((di->d_fieldmask & QC_INO_HARD) && | 2522 | ((di->d_fieldmask & QC_INO_HARD) && |
2453 | (di->d_ino_hardlimit > dqi->dqi_maxilimit))) | 2523 | (di->d_ino_hardlimit > dqi->dqi_max_ino_limit))) |
2454 | return -ERANGE; | 2524 | return -ERANGE; |
2455 | 2525 | ||
2456 | spin_lock(&dq_data_lock); | 2526 | spin_lock(&dq_data_lock); |
@@ -2577,6 +2647,14 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) | |||
2577 | goto out; | 2647 | goto out; |
2578 | } | 2648 | } |
2579 | mi = sb_dqopt(sb)->info + type; | 2649 | mi = sb_dqopt(sb)->info + type; |
2650 | if (ii->dqi_valid & IIF_FLAGS) { | ||
2651 | if (ii->dqi_flags & ~DQF_SETINFO_MASK || | ||
2652 | (ii->dqi_flags & DQF_ROOT_SQUASH && | ||
2653 | mi->dqi_format->qf_fmt_id != QFMT_VFS_OLD)) { | ||
2654 | err = -EINVAL; | ||
2655 | goto out; | ||
2656 | } | ||
2657 | } | ||
2580 | spin_lock(&dq_data_lock); | 2658 | spin_lock(&dq_data_lock); |
2581 | if (ii->dqi_valid & IIF_BGRACE) | 2659 | if (ii->dqi_valid & IIF_BGRACE) |
2582 | mi->dqi_bgrace = ii->dqi_bgrace; | 2660 | mi->dqi_bgrace = ii->dqi_bgrace; |
@@ -2606,6 +2684,17 @@ const struct quotactl_ops dquot_quotactl_ops = { | |||
2606 | }; | 2684 | }; |
2607 | EXPORT_SYMBOL(dquot_quotactl_ops); | 2685 | EXPORT_SYMBOL(dquot_quotactl_ops); |
2608 | 2686 | ||
2687 | const struct quotactl_ops dquot_quotactl_sysfile_ops = { | ||
2688 | .quota_enable = dquot_quota_enable, | ||
2689 | .quota_disable = dquot_quota_disable, | ||
2690 | .quota_sync = dquot_quota_sync, | ||
2691 | .get_info = dquot_get_dqinfo, | ||
2692 | .set_info = dquot_set_dqinfo, | ||
2693 | .get_dqblk = dquot_get_dqblk, | ||
2694 | .set_dqblk = dquot_set_dqblk | ||
2695 | }; | ||
2696 | EXPORT_SYMBOL(dquot_quotactl_sysfile_ops); | ||
2697 | |||
2609 | static int do_proc_dqstats(struct ctl_table *table, int write, | 2698 | static int do_proc_dqstats(struct ctl_table *table, int write, |
2610 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2699 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2611 | { | 2700 | { |
diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 6f3856328eea..d14a799c7785 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c | |||
@@ -66,18 +66,40 @@ static int quota_sync_all(int type) | |||
66 | return ret; | 66 | return ret; |
67 | } | 67 | } |
68 | 68 | ||
69 | unsigned int qtype_enforce_flag(int type) | ||
70 | { | ||
71 | switch (type) { | ||
72 | case USRQUOTA: | ||
73 | return FS_QUOTA_UDQ_ENFD; | ||
74 | case GRPQUOTA: | ||
75 | return FS_QUOTA_GDQ_ENFD; | ||
76 | case PRJQUOTA: | ||
77 | return FS_QUOTA_PDQ_ENFD; | ||
78 | } | ||
79 | return 0; | ||
80 | } | ||
81 | |||
69 | static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id, | 82 | static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id, |
70 | struct path *path) | 83 | struct path *path) |
71 | { | 84 | { |
72 | if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_on_meta) | 85 | if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_enable) |
73 | return -ENOSYS; | 86 | return -ENOSYS; |
74 | if (sb->s_qcop->quota_on_meta) | 87 | if (sb->s_qcop->quota_enable) |
75 | return sb->s_qcop->quota_on_meta(sb, type, id); | 88 | return sb->s_qcop->quota_enable(sb, qtype_enforce_flag(type)); |
76 | if (IS_ERR(path)) | 89 | if (IS_ERR(path)) |
77 | return PTR_ERR(path); | 90 | return PTR_ERR(path); |
78 | return sb->s_qcop->quota_on(sb, type, id, path); | 91 | return sb->s_qcop->quota_on(sb, type, id, path); |
79 | } | 92 | } |
80 | 93 | ||
94 | static int quota_quotaoff(struct super_block *sb, int type) | ||
95 | { | ||
96 | if (!sb->s_qcop->quota_off && !sb->s_qcop->quota_disable) | ||
97 | return -ENOSYS; | ||
98 | if (sb->s_qcop->quota_disable) | ||
99 | return sb->s_qcop->quota_disable(sb, qtype_enforce_flag(type)); | ||
100 | return sb->s_qcop->quota_off(sb, type); | ||
101 | } | ||
102 | |||
81 | static int quota_getfmt(struct super_block *sb, int type, void __user *addr) | 103 | static int quota_getfmt(struct super_block *sb, int type, void __user *addr) |
82 | { | 104 | { |
83 | __u32 fmt; | 105 | __u32 fmt; |
@@ -208,15 +230,26 @@ static int quota_setquota(struct super_block *sb, int type, qid_t id, | |||
208 | return sb->s_qcop->set_dqblk(sb, qid, &fdq); | 230 | return sb->s_qcop->set_dqblk(sb, qid, &fdq); |
209 | } | 231 | } |
210 | 232 | ||
211 | static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr) | 233 | static int quota_enable(struct super_block *sb, void __user *addr) |
212 | { | 234 | { |
213 | __u32 flags; | 235 | __u32 flags; |
214 | 236 | ||
215 | if (copy_from_user(&flags, addr, sizeof(flags))) | 237 | if (copy_from_user(&flags, addr, sizeof(flags))) |
216 | return -EFAULT; | 238 | return -EFAULT; |
217 | if (!sb->s_qcop->set_xstate) | 239 | if (!sb->s_qcop->quota_enable) |
218 | return -ENOSYS; | 240 | return -ENOSYS; |
219 | return sb->s_qcop->set_xstate(sb, flags, cmd); | 241 | return sb->s_qcop->quota_enable(sb, flags); |
242 | } | ||
243 | |||
244 | static int quota_disable(struct super_block *sb, void __user *addr) | ||
245 | { | ||
246 | __u32 flags; | ||
247 | |||
248 | if (copy_from_user(&flags, addr, sizeof(flags))) | ||
249 | return -EFAULT; | ||
250 | if (!sb->s_qcop->quota_disable) | ||
251 | return -ENOSYS; | ||
252 | return sb->s_qcop->quota_disable(sb, flags); | ||
220 | } | 253 | } |
221 | 254 | ||
222 | static int quota_getxstate(struct super_block *sb, void __user *addr) | 255 | static int quota_getxstate(struct super_block *sb, void __user *addr) |
@@ -429,9 +462,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, | |||
429 | case Q_QUOTAON: | 462 | case Q_QUOTAON: |
430 | return quota_quotaon(sb, type, cmd, id, path); | 463 | return quota_quotaon(sb, type, cmd, id, path); |
431 | case Q_QUOTAOFF: | 464 | case Q_QUOTAOFF: |
432 | if (!sb->s_qcop->quota_off) | 465 | return quota_quotaoff(sb, type); |
433 | return -ENOSYS; | ||
434 | return sb->s_qcop->quota_off(sb, type); | ||
435 | case Q_GETFMT: | 466 | case Q_GETFMT: |
436 | return quota_getfmt(sb, type, addr); | 467 | return quota_getfmt(sb, type, addr); |
437 | case Q_GETINFO: | 468 | case Q_GETINFO: |
@@ -447,8 +478,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, | |||
447 | return -ENOSYS; | 478 | return -ENOSYS; |
448 | return sb->s_qcop->quota_sync(sb, type); | 479 | return sb->s_qcop->quota_sync(sb, type); |
449 | case Q_XQUOTAON: | 480 | case Q_XQUOTAON: |
481 | return quota_enable(sb, addr); | ||
450 | case Q_XQUOTAOFF: | 482 | case Q_XQUOTAOFF: |
451 | return quota_setxstate(sb, cmd, addr); | 483 | return quota_disable(sb, addr); |
452 | case Q_XQUOTARM: | 484 | case Q_XQUOTARM: |
453 | return quota_rmxquota(sb, addr); | 485 | return quota_rmxquota(sb, addr); |
454 | case Q_XGETQSTAT: | 486 | case Q_XGETQSTAT: |
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c index 469c6848b322..8fe79beced5c 100644 --- a/fs/quota/quota_v1.c +++ b/fs/quota/quota_v1.c | |||
@@ -169,8 +169,8 @@ static int v1_read_file_info(struct super_block *sb, int type) | |||
169 | } | 169 | } |
170 | ret = 0; | 170 | ret = 0; |
171 | /* limits are stored as unsigned 32-bit data */ | 171 | /* limits are stored as unsigned 32-bit data */ |
172 | dqopt->info[type].dqi_maxblimit = 0xffffffff; | 172 | dqopt->info[type].dqi_max_spc_limit = 0xffffffffULL << QUOTABLOCK_BITS; |
173 | dqopt->info[type].dqi_maxilimit = 0xffffffff; | 173 | dqopt->info[type].dqi_max_ino_limit = 0xffffffff; |
174 | dqopt->info[type].dqi_igrace = | 174 | dqopt->info[type].dqi_igrace = |
175 | dqblk.dqb_itime ? dqblk.dqb_itime : MAX_IQ_TIME; | 175 | dqblk.dqb_itime ? dqblk.dqb_itime : MAX_IQ_TIME; |
176 | dqopt->info[type].dqi_bgrace = | 176 | dqopt->info[type].dqi_bgrace = |
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c index 02751ec695c5..9cb10d7197f7 100644 --- a/fs/quota/quota_v2.c +++ b/fs/quota/quota_v2.c | |||
@@ -117,16 +117,17 @@ static int v2_read_file_info(struct super_block *sb, int type) | |||
117 | qinfo = info->dqi_priv; | 117 | qinfo = info->dqi_priv; |
118 | if (version == 0) { | 118 | if (version == 0) { |
119 | /* limits are stored as unsigned 32-bit data */ | 119 | /* limits are stored as unsigned 32-bit data */ |
120 | info->dqi_maxblimit = 0xffffffff; | 120 | info->dqi_max_spc_limit = 0xffffffffULL << QUOTABLOCK_BITS; |
121 | info->dqi_maxilimit = 0xffffffff; | 121 | info->dqi_max_ino_limit = 0xffffffff; |
122 | } else { | 122 | } else { |
123 | /* used space is stored as unsigned 64-bit value */ | 123 | /* used space is stored as unsigned 64-bit value in bytes */ |
124 | info->dqi_maxblimit = 0xffffffffffffffffULL; /* 2^64-1 */ | 124 | info->dqi_max_spc_limit = 0xffffffffffffffffULL; /* 2^64-1 */ |
125 | info->dqi_maxilimit = 0xffffffffffffffffULL; | 125 | info->dqi_max_ino_limit = 0xffffffffffffffffULL; |
126 | } | 126 | } |
127 | info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); | 127 | info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); |
128 | info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); | 128 | info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); |
129 | info->dqi_flags = le32_to_cpu(dinfo.dqi_flags); | 129 | /* No flags currently supported */ |
130 | info->dqi_flags = 0; | ||
130 | qinfo->dqi_sb = sb; | 131 | qinfo->dqi_sb = sb; |
131 | qinfo->dqi_type = type; | 132 | qinfo->dqi_type = type; |
132 | qinfo->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); | 133 | qinfo->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); |
@@ -157,7 +158,8 @@ static int v2_write_file_info(struct super_block *sb, int type) | |||
157 | info->dqi_flags &= ~DQF_INFO_DIRTY; | 158 | info->dqi_flags &= ~DQF_INFO_DIRTY; |
158 | dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace); | 159 | dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace); |
159 | dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace); | 160 | dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace); |
160 | dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK); | 161 | /* No flags currently supported */ |
162 | dinfo.dqi_flags = cpu_to_le32(0); | ||
161 | spin_unlock(&dq_data_lock); | 163 | spin_unlock(&dq_data_lock); |
162 | dinfo.dqi_blocks = cpu_to_le32(qinfo->dqi_blocks); | 164 | dinfo.dqi_blocks = cpu_to_le32(qinfo->dqi_blocks); |
163 | dinfo.dqi_free_blk = cpu_to_le32(qinfo->dqi_free_blk); | 165 | dinfo.dqi_free_blk = cpu_to_le32(qinfo->dqi_free_blk); |
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index bbafbde3471a..f6ab41b39612 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c | |||
@@ -34,7 +34,14 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct file *file, | |||
34 | unsigned long flags); | 34 | unsigned long flags); |
35 | static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma); | 35 | static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma); |
36 | 36 | ||
37 | static unsigned ramfs_mmap_capabilities(struct file *file) | ||
38 | { | ||
39 | return NOMMU_MAP_DIRECT | NOMMU_MAP_COPY | NOMMU_MAP_READ | | ||
40 | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC; | ||
41 | } | ||
42 | |||
37 | const struct file_operations ramfs_file_operations = { | 43 | const struct file_operations ramfs_file_operations = { |
44 | .mmap_capabilities = ramfs_mmap_capabilities, | ||
38 | .mmap = ramfs_nommu_mmap, | 45 | .mmap = ramfs_nommu_mmap, |
39 | .get_unmapped_area = ramfs_nommu_get_unmapped_area, | 46 | .get_unmapped_area = ramfs_nommu_get_unmapped_area, |
40 | .read = new_sync_read, | 47 | .read = new_sync_read, |
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index d365b1c4eb3c..889d558b4e05 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c | |||
@@ -50,14 +50,6 @@ static const struct address_space_operations ramfs_aops = { | |||
50 | .set_page_dirty = __set_page_dirty_no_writeback, | 50 | .set_page_dirty = __set_page_dirty_no_writeback, |
51 | }; | 51 | }; |
52 | 52 | ||
53 | static struct backing_dev_info ramfs_backing_dev_info = { | ||
54 | .name = "ramfs", | ||
55 | .ra_pages = 0, /* No readahead */ | ||
56 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | | ||
57 | BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | | ||
58 | BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP, | ||
59 | }; | ||
60 | |||
61 | struct inode *ramfs_get_inode(struct super_block *sb, | 53 | struct inode *ramfs_get_inode(struct super_block *sb, |
62 | const struct inode *dir, umode_t mode, dev_t dev) | 54 | const struct inode *dir, umode_t mode, dev_t dev) |
63 | { | 55 | { |
@@ -67,7 +59,6 @@ struct inode *ramfs_get_inode(struct super_block *sb, | |||
67 | inode->i_ino = get_next_ino(); | 59 | inode->i_ino = get_next_ino(); |
68 | inode_init_owner(inode, dir, mode); | 60 | inode_init_owner(inode, dir, mode); |
69 | inode->i_mapping->a_ops = &ramfs_aops; | 61 | inode->i_mapping->a_ops = &ramfs_aops; |
70 | inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; | ||
71 | mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); | 62 | mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); |
72 | mapping_set_unevictable(inode->i_mapping); | 63 | mapping_set_unevictable(inode->i_mapping); |
73 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 64 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
@@ -267,19 +258,9 @@ static struct file_system_type ramfs_fs_type = { | |||
267 | int __init init_ramfs_fs(void) | 258 | int __init init_ramfs_fs(void) |
268 | { | 259 | { |
269 | static unsigned long once; | 260 | static unsigned long once; |
270 | int err; | ||
271 | 261 | ||
272 | if (test_and_set_bit(0, &once)) | 262 | if (test_and_set_bit(0, &once)) |
273 | return 0; | 263 | return 0; |
274 | 264 | return register_filesystem(&ramfs_fs_type); | |
275 | err = bdi_init(&ramfs_backing_dev_info); | ||
276 | if (err) | ||
277 | return err; | ||
278 | |||
279 | err = register_filesystem(&ramfs_fs_type); | ||
280 | if (err) | ||
281 | bdi_destroy(&ramfs_backing_dev_info); | ||
282 | |||
283 | return err; | ||
284 | } | 265 | } |
285 | fs_initcall(init_ramfs_fs); | 266 | fs_initcall(init_ramfs_fs); |
diff --git a/fs/read_write.c b/fs/read_write.c index c0805c93b6fa..8e1b68786d66 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -333,6 +333,52 @@ out_putf: | |||
333 | } | 333 | } |
334 | #endif | 334 | #endif |
335 | 335 | ||
336 | ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos) | ||
337 | { | ||
338 | struct kiocb kiocb; | ||
339 | ssize_t ret; | ||
340 | |||
341 | if (!file->f_op->read_iter) | ||
342 | return -EINVAL; | ||
343 | |||
344 | init_sync_kiocb(&kiocb, file); | ||
345 | kiocb.ki_pos = *ppos; | ||
346 | kiocb.ki_nbytes = iov_iter_count(iter); | ||
347 | |||
348 | iter->type |= READ; | ||
349 | ret = file->f_op->read_iter(&kiocb, iter); | ||
350 | if (ret == -EIOCBQUEUED) | ||
351 | ret = wait_on_sync_kiocb(&kiocb); | ||
352 | |||
353 | if (ret > 0) | ||
354 | *ppos = kiocb.ki_pos; | ||
355 | return ret; | ||
356 | } | ||
357 | EXPORT_SYMBOL(vfs_iter_read); | ||
358 | |||
359 | ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos) | ||
360 | { | ||
361 | struct kiocb kiocb; | ||
362 | ssize_t ret; | ||
363 | |||
364 | if (!file->f_op->write_iter) | ||
365 | return -EINVAL; | ||
366 | |||
367 | init_sync_kiocb(&kiocb, file); | ||
368 | kiocb.ki_pos = *ppos; | ||
369 | kiocb.ki_nbytes = iov_iter_count(iter); | ||
370 | |||
371 | iter->type |= WRITE; | ||
372 | ret = file->f_op->write_iter(&kiocb, iter); | ||
373 | if (ret == -EIOCBQUEUED) | ||
374 | ret = wait_on_sync_kiocb(&kiocb); | ||
375 | |||
376 | if (ret > 0) | ||
377 | *ppos = kiocb.ki_pos; | ||
378 | return ret; | ||
379 | } | ||
380 | EXPORT_SYMBOL(vfs_iter_write); | ||
381 | |||
336 | /* | 382 | /* |
337 | * rw_verify_area doesn't like huge counts. We limit | 383 | * rw_verify_area doesn't like huge counts. We limit |
338 | * them to something that fits in "int" so that others | 384 | * them to something that fits in "int" so that others |
@@ -358,7 +404,7 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t | |||
358 | return retval; | 404 | return retval; |
359 | } | 405 | } |
360 | 406 | ||
361 | if (unlikely(inode->i_flock && mandatory_lock(inode))) { | 407 | if (unlikely(inode->i_flctx && mandatory_lock(inode))) { |
362 | retval = locks_mandatory_area( | 408 | retval = locks_mandatory_area( |
363 | read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, | 409 | read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, |
364 | inode, file, pos, count); | 410 | inode, file, pos, count); |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index a7eec9888f10..e72401e1f995 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -2766,7 +2766,7 @@ static int reiserfs_write_begin(struct file *file, | |||
2766 | int old_ref = 0; | 2766 | int old_ref = 0; |
2767 | 2767 | ||
2768 | inode = mapping->host; | 2768 | inode = mapping->host; |
2769 | *fsdata = 0; | 2769 | *fsdata = NULL; |
2770 | if (flags & AOP_FLAG_CONT_EXPAND && | 2770 | if (flags & AOP_FLAG_CONT_EXPAND && |
2771 | (pos & (inode->i_sb->s_blocksize - 1)) == 0) { | 2771 | (pos & (inode->i_sb->s_blocksize - 1)) == 0) { |
2772 | pos ++; | 2772 | pos ++; |
diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c index ea06c7554860..7da9e2153953 100644 --- a/fs/romfs/mmap-nommu.c +++ b/fs/romfs/mmap-nommu.c | |||
@@ -70,6 +70,15 @@ static int romfs_mmap(struct file *file, struct vm_area_struct *vma) | |||
70 | return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -ENOSYS; | 70 | return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -ENOSYS; |
71 | } | 71 | } |
72 | 72 | ||
73 | static unsigned romfs_mmap_capabilities(struct file *file) | ||
74 | { | ||
75 | struct mtd_info *mtd = file_inode(file)->i_sb->s_mtd; | ||
76 | |||
77 | if (!mtd) | ||
78 | return NOMMU_MAP_COPY; | ||
79 | return mtd_mmap_capabilities(mtd); | ||
80 | } | ||
81 | |||
73 | const struct file_operations romfs_ro_fops = { | 82 | const struct file_operations romfs_ro_fops = { |
74 | .llseek = generic_file_llseek, | 83 | .llseek = generic_file_llseek, |
75 | .read = new_sync_read, | 84 | .read = new_sync_read, |
@@ -77,4 +86,5 @@ const struct file_operations romfs_ro_fops = { | |||
77 | .splice_read = generic_file_splice_read, | 86 | .splice_read = generic_file_splice_read, |
78 | .mmap = romfs_mmap, | 87 | .mmap = romfs_mmap, |
79 | .get_unmapped_area = romfs_get_unmapped_area, | 88 | .get_unmapped_area = romfs_get_unmapped_area, |
89 | .mmap_capabilities = romfs_mmap_capabilities, | ||
80 | }; | 90 | }; |
diff --git a/fs/romfs/super.c b/fs/romfs/super.c index e98dd88197d5..268733cda397 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c | |||
@@ -355,9 +355,6 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos) | |||
355 | case ROMFH_REG: | 355 | case ROMFH_REG: |
356 | i->i_fop = &romfs_ro_fops; | 356 | i->i_fop = &romfs_ro_fops; |
357 | i->i_data.a_ops = &romfs_aops; | 357 | i->i_data.a_ops = &romfs_aops; |
358 | if (i->i_sb->s_mtd) | ||
359 | i->i_data.backing_dev_info = | ||
360 | i->i_sb->s_mtd->backing_dev_info; | ||
361 | if (nextfh & ROMFH_EXEC) | 358 | if (nextfh & ROMFH_EXEC) |
362 | mode |= S_IXUGO; | 359 | mode |= S_IXUGO; |
363 | break; | 360 | break; |
diff --git a/fs/select.c b/fs/select.c index 467bb1cb3ea5..f684c750e08a 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -971,7 +971,7 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, | |||
971 | if (ret == -EINTR) { | 971 | if (ret == -EINTR) { |
972 | struct restart_block *restart_block; | 972 | struct restart_block *restart_block; |
973 | 973 | ||
974 | restart_block = ¤t_thread_info()->restart_block; | 974 | restart_block = ¤t->restart_block; |
975 | restart_block->fn = do_restart_poll; | 975 | restart_block->fn = do_restart_poll; |
976 | restart_block->poll.ufds = ufds; | 976 | restart_block->poll.ufds = ufds; |
977 | restart_block->poll.nfds = nfds; | 977 | restart_block->poll.nfds = nfds; |
diff --git a/fs/seq_file.c b/fs/seq_file.c index dbf3a59c86bb..555f82155be8 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
@@ -539,38 +539,6 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, const char *esc) | |||
539 | return res; | 539 | return res; |
540 | } | 540 | } |
541 | 541 | ||
542 | int seq_bitmap(struct seq_file *m, const unsigned long *bits, | ||
543 | unsigned int nr_bits) | ||
544 | { | ||
545 | if (m->count < m->size) { | ||
546 | int len = bitmap_scnprintf(m->buf + m->count, | ||
547 | m->size - m->count, bits, nr_bits); | ||
548 | if (m->count + len < m->size) { | ||
549 | m->count += len; | ||
550 | return 0; | ||
551 | } | ||
552 | } | ||
553 | seq_set_overflow(m); | ||
554 | return -1; | ||
555 | } | ||
556 | EXPORT_SYMBOL(seq_bitmap); | ||
557 | |||
558 | int seq_bitmap_list(struct seq_file *m, const unsigned long *bits, | ||
559 | unsigned int nr_bits) | ||
560 | { | ||
561 | if (m->count < m->size) { | ||
562 | int len = bitmap_scnlistprintf(m->buf + m->count, | ||
563 | m->size - m->count, bits, nr_bits); | ||
564 | if (m->count + len < m->size) { | ||
565 | m->count += len; | ||
566 | return 0; | ||
567 | } | ||
568 | } | ||
569 | seq_set_overflow(m); | ||
570 | return -1; | ||
571 | } | ||
572 | EXPORT_SYMBOL(seq_bitmap_list); | ||
573 | |||
574 | static void *single_start(struct seq_file *p, loff_t *pos) | 542 | static void *single_start(struct seq_file *p, loff_t *pos) |
575 | { | 543 | { |
576 | return NULL + (*pos == 0); | 544 | return NULL + (*pos == 0); |
diff --git a/fs/splice.c b/fs/splice.c index 75c6058eabf2..7968da96bebb 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -961,7 +961,6 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
961 | splice_from_pipe_begin(&sd); | 961 | splice_from_pipe_begin(&sd); |
962 | while (sd.total_len) { | 962 | while (sd.total_len) { |
963 | struct iov_iter from; | 963 | struct iov_iter from; |
964 | struct kiocb kiocb; | ||
965 | size_t left; | 964 | size_t left; |
966 | int n, idx; | 965 | int n, idx; |
967 | 966 | ||
@@ -1005,29 +1004,15 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
1005 | left -= this_len; | 1004 | left -= this_len; |
1006 | } | 1005 | } |
1007 | 1006 | ||
1008 | /* ... iov_iter */ | 1007 | iov_iter_bvec(&from, ITER_BVEC | WRITE, array, n, |
1009 | from.type = ITER_BVEC | WRITE; | 1008 | sd.total_len - left); |
1010 | from.bvec = array; | 1009 | ret = vfs_iter_write(out, &from, &sd.pos); |
1011 | from.nr_segs = n; | ||
1012 | from.count = sd.total_len - left; | ||
1013 | from.iov_offset = 0; | ||
1014 | |||
1015 | /* ... and iocb */ | ||
1016 | init_sync_kiocb(&kiocb, out); | ||
1017 | kiocb.ki_pos = sd.pos; | ||
1018 | kiocb.ki_nbytes = sd.total_len - left; | ||
1019 | |||
1020 | /* now, send it */ | ||
1021 | ret = out->f_op->write_iter(&kiocb, &from); | ||
1022 | if (-EIOCBQUEUED == ret) | ||
1023 | ret = wait_on_sync_kiocb(&kiocb); | ||
1024 | |||
1025 | if (ret <= 0) | 1010 | if (ret <= 0) |
1026 | break; | 1011 | break; |
1027 | 1012 | ||
1028 | sd.num_spliced += ret; | 1013 | sd.num_spliced += ret; |
1029 | sd.total_len -= ret; | 1014 | sd.total_len -= ret; |
1030 | *ppos = sd.pos = kiocb.ki_pos; | 1015 | *ppos = sd.pos; |
1031 | 1016 | ||
1032 | /* dismiss the fully eaten buffers, adjust the partial one */ | 1017 | /* dismiss the fully eaten buffers, adjust the partial one */ |
1033 | while (ret) { | 1018 | while (ret) { |
diff --git a/fs/super.c b/fs/super.c index eae088f6aaae..65a53efc1cf4 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -36,8 +36,8 @@ | |||
36 | #include "internal.h" | 36 | #include "internal.h" |
37 | 37 | ||
38 | 38 | ||
39 | LIST_HEAD(super_blocks); | 39 | static LIST_HEAD(super_blocks); |
40 | DEFINE_SPINLOCK(sb_lock); | 40 | static DEFINE_SPINLOCK(sb_lock); |
41 | 41 | ||
42 | static char *sb_writers_name[SB_FREEZE_LEVELS] = { | 42 | static char *sb_writers_name[SB_FREEZE_LEVELS] = { |
43 | "sb_writers", | 43 | "sb_writers", |
@@ -75,10 +75,10 @@ static unsigned long super_cache_scan(struct shrinker *shrink, | |||
75 | return SHRINK_STOP; | 75 | return SHRINK_STOP; |
76 | 76 | ||
77 | if (sb->s_op->nr_cached_objects) | 77 | if (sb->s_op->nr_cached_objects) |
78 | fs_objects = sb->s_op->nr_cached_objects(sb, sc->nid); | 78 | fs_objects = sb->s_op->nr_cached_objects(sb, sc); |
79 | 79 | ||
80 | inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid); | 80 | inodes = list_lru_shrink_count(&sb->s_inode_lru, sc); |
81 | dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid); | 81 | dentries = list_lru_shrink_count(&sb->s_dentry_lru, sc); |
82 | total_objects = dentries + inodes + fs_objects + 1; | 82 | total_objects = dentries + inodes + fs_objects + 1; |
83 | if (!total_objects) | 83 | if (!total_objects) |
84 | total_objects = 1; | 84 | total_objects = 1; |
@@ -86,19 +86,23 @@ static unsigned long super_cache_scan(struct shrinker *shrink, | |||
86 | /* proportion the scan between the caches */ | 86 | /* proportion the scan between the caches */ |
87 | dentries = mult_frac(sc->nr_to_scan, dentries, total_objects); | 87 | dentries = mult_frac(sc->nr_to_scan, dentries, total_objects); |
88 | inodes = mult_frac(sc->nr_to_scan, inodes, total_objects); | 88 | inodes = mult_frac(sc->nr_to_scan, inodes, total_objects); |
89 | fs_objects = mult_frac(sc->nr_to_scan, fs_objects, total_objects); | ||
89 | 90 | ||
90 | /* | 91 | /* |
91 | * prune the dcache first as the icache is pinned by it, then | 92 | * prune the dcache first as the icache is pinned by it, then |
92 | * prune the icache, followed by the filesystem specific caches | 93 | * prune the icache, followed by the filesystem specific caches |
94 | * | ||
95 | * Ensure that we always scan at least one object - memcg kmem | ||
96 | * accounting uses this to fully empty the caches. | ||
93 | */ | 97 | */ |
94 | freed = prune_dcache_sb(sb, dentries, sc->nid); | 98 | sc->nr_to_scan = dentries + 1; |
95 | freed += prune_icache_sb(sb, inodes, sc->nid); | 99 | freed = prune_dcache_sb(sb, sc); |
100 | sc->nr_to_scan = inodes + 1; | ||
101 | freed += prune_icache_sb(sb, sc); | ||
96 | 102 | ||
97 | if (fs_objects) { | 103 | if (fs_objects) { |
98 | fs_objects = mult_frac(sc->nr_to_scan, fs_objects, | 104 | sc->nr_to_scan = fs_objects + 1; |
99 | total_objects); | 105 | freed += sb->s_op->free_cached_objects(sb, sc); |
100 | freed += sb->s_op->free_cached_objects(sb, fs_objects, | ||
101 | sc->nid); | ||
102 | } | 106 | } |
103 | 107 | ||
104 | drop_super(sb); | 108 | drop_super(sb); |
@@ -118,17 +122,14 @@ static unsigned long super_cache_count(struct shrinker *shrink, | |||
118 | * scalability bottleneck. The counts could get updated | 122 | * scalability bottleneck. The counts could get updated |
119 | * between super_cache_count and super_cache_scan anyway. | 123 | * between super_cache_count and super_cache_scan anyway. |
120 | * Call to super_cache_count with shrinker_rwsem held | 124 | * Call to super_cache_count with shrinker_rwsem held |
121 | * ensures the safety of call to list_lru_count_node() and | 125 | * ensures the safety of call to list_lru_shrink_count() and |
122 | * s_op->nr_cached_objects(). | 126 | * s_op->nr_cached_objects(). |
123 | */ | 127 | */ |
124 | if (sb->s_op && sb->s_op->nr_cached_objects) | 128 | if (sb->s_op && sb->s_op->nr_cached_objects) |
125 | total_objects = sb->s_op->nr_cached_objects(sb, | 129 | total_objects = sb->s_op->nr_cached_objects(sb, sc); |
126 | sc->nid); | ||
127 | 130 | ||
128 | total_objects += list_lru_count_node(&sb->s_dentry_lru, | 131 | total_objects += list_lru_shrink_count(&sb->s_dentry_lru, sc); |
129 | sc->nid); | 132 | total_objects += list_lru_shrink_count(&sb->s_inode_lru, sc); |
130 | total_objects += list_lru_count_node(&sb->s_inode_lru, | ||
131 | sc->nid); | ||
132 | 133 | ||
133 | total_objects = vfs_pressure_ratio(total_objects); | 134 | total_objects = vfs_pressure_ratio(total_objects); |
134 | return total_objects; | 135 | return total_objects; |
@@ -185,15 +186,15 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
185 | } | 186 | } |
186 | init_waitqueue_head(&s->s_writers.wait); | 187 | init_waitqueue_head(&s->s_writers.wait); |
187 | init_waitqueue_head(&s->s_writers.wait_unfrozen); | 188 | init_waitqueue_head(&s->s_writers.wait_unfrozen); |
189 | s->s_bdi = &noop_backing_dev_info; | ||
188 | s->s_flags = flags; | 190 | s->s_flags = flags; |
189 | s->s_bdi = &default_backing_dev_info; | ||
190 | INIT_HLIST_NODE(&s->s_instances); | 191 | INIT_HLIST_NODE(&s->s_instances); |
191 | INIT_HLIST_BL_HEAD(&s->s_anon); | 192 | INIT_HLIST_BL_HEAD(&s->s_anon); |
192 | INIT_LIST_HEAD(&s->s_inodes); | 193 | INIT_LIST_HEAD(&s->s_inodes); |
193 | 194 | ||
194 | if (list_lru_init(&s->s_dentry_lru)) | 195 | if (list_lru_init_memcg(&s->s_dentry_lru)) |
195 | goto fail; | 196 | goto fail; |
196 | if (list_lru_init(&s->s_inode_lru)) | 197 | if (list_lru_init_memcg(&s->s_inode_lru)) |
197 | goto fail; | 198 | goto fail; |
198 | 199 | ||
199 | init_rwsem(&s->s_umount); | 200 | init_rwsem(&s->s_umount); |
@@ -229,7 +230,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
229 | s->s_shrink.scan_objects = super_cache_scan; | 230 | s->s_shrink.scan_objects = super_cache_scan; |
230 | s->s_shrink.count_objects = super_cache_count; | 231 | s->s_shrink.count_objects = super_cache_count; |
231 | s->s_shrink.batch = 1024; | 232 | s->s_shrink.batch = 1024; |
232 | s->s_shrink.flags = SHRINKER_NUMA_AWARE; | 233 | s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE; |
233 | return s; | 234 | return s; |
234 | 235 | ||
235 | fail: | 236 | fail: |
@@ -284,6 +285,14 @@ void deactivate_locked_super(struct super_block *s) | |||
284 | unregister_shrinker(&s->s_shrink); | 285 | unregister_shrinker(&s->s_shrink); |
285 | fs->kill_sb(s); | 286 | fs->kill_sb(s); |
286 | 287 | ||
288 | /* | ||
289 | * Since list_lru_destroy() may sleep, we cannot call it from | ||
290 | * put_super(), where we hold the sb_lock. Therefore we destroy | ||
291 | * the lru lists right now. | ||
292 | */ | ||
293 | list_lru_destroy(&s->s_dentry_lru); | ||
294 | list_lru_destroy(&s->s_inode_lru); | ||
295 | |||
287 | put_filesystem(fs); | 296 | put_filesystem(fs); |
288 | put_super(s); | 297 | put_super(s); |
289 | } else { | 298 | } else { |
@@ -706,9 +715,9 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) | |||
706 | remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); | 715 | remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); |
707 | 716 | ||
708 | if (remount_ro) { | 717 | if (remount_ro) { |
709 | if (sb->s_pins.first) { | 718 | if (!hlist_empty(&sb->s_pins)) { |
710 | up_write(&sb->s_umount); | 719 | up_write(&sb->s_umount); |
711 | sb_pin_kill(sb); | 720 | group_pin_kill(&sb->s_pins); |
712 | down_write(&sb->s_umount); | 721 | down_write(&sb->s_umount); |
713 | if (!sb->s_root) | 722 | if (!sb->s_root) |
714 | return 0; | 723 | return 0; |
@@ -863,10 +872,7 @@ EXPORT_SYMBOL(free_anon_bdev); | |||
863 | 872 | ||
864 | int set_anon_super(struct super_block *s, void *data) | 873 | int set_anon_super(struct super_block *s, void *data) |
865 | { | 874 | { |
866 | int error = get_anon_bdev(&s->s_dev); | 875 | return get_anon_bdev(&s->s_dev); |
867 | if (!error) | ||
868 | s->s_bdi = &noop_backing_dev_info; | ||
869 | return error; | ||
870 | } | 876 | } |
871 | 877 | ||
872 | EXPORT_SYMBOL(set_anon_super); | 878 | EXPORT_SYMBOL(set_anon_super); |
@@ -1111,7 +1117,6 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) | |||
1111 | sb = root->d_sb; | 1117 | sb = root->d_sb; |
1112 | BUG_ON(!sb); | 1118 | BUG_ON(!sb); |
1113 | WARN_ON(!sb->s_bdi); | 1119 | WARN_ON(!sb->s_bdi); |
1114 | WARN_ON(sb->s_bdi == &default_backing_dev_info); | ||
1115 | sb->s_flags |= MS_BORN; | 1120 | sb->s_flags |= MS_BORN; |
1116 | 1121 | ||
1117 | error = security_sb_kern_mount(sb, flags, secdata); | 1122 | error = security_sb_kern_mount(sb, flags, secdata); |
@@ -177,8 +177,16 @@ SYSCALL_DEFINE1(syncfs, int, fd) | |||
177 | */ | 177 | */ |
178 | int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) | 178 | int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) |
179 | { | 179 | { |
180 | struct inode *inode = file->f_mapping->host; | ||
181 | |||
180 | if (!file->f_op->fsync) | 182 | if (!file->f_op->fsync) |
181 | return -EINVAL; | 183 | return -EINVAL; |
184 | if (!datasync && (inode->i_state & I_DIRTY_TIME)) { | ||
185 | spin_lock(&inode->i_lock); | ||
186 | inode->i_state &= ~I_DIRTY_TIME; | ||
187 | spin_unlock(&inode->i_lock); | ||
188 | mark_inode_dirty_sync(inode); | ||
189 | } | ||
182 | return file->f_op->fsync(file, start, end, datasync); | 190 | return file->f_op->fsync(file, start, end, datasync); |
183 | } | 191 | } |
184 | EXPORT_SYMBOL(vfs_fsync_range); | 192 | EXPORT_SYMBOL(vfs_fsync_range); |
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index dfe928a9540f..7c2867b44141 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -295,7 +295,7 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent, | |||
295 | key = attr->key ?: (struct lock_class_key *)&attr->skey; | 295 | key = attr->key ?: (struct lock_class_key *)&attr->skey; |
296 | #endif | 296 | #endif |
297 | kn = __kernfs_create_file(parent, attr->name, mode & 0777, size, ops, | 297 | kn = __kernfs_create_file(parent, attr->name, mode & 0777, size, ops, |
298 | (void *)attr, ns, true, key); | 298 | (void *)attr, ns, key); |
299 | if (IS_ERR(kn)) { | 299 | if (IS_ERR(kn)) { |
300 | if (PTR_ERR(kn) == -EEXIST) | 300 | if (PTR_ERR(kn) == -EEXIST) |
301 | sysfs_warn_dup(parent, attr->name); | 301 | sysfs_warn_dup(parent, attr->name); |
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 7d2a860ba788..2554d8835b48 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c | |||
@@ -99,7 +99,7 @@ static int internal_create_group(struct kobject *kobj, int update, | |||
99 | return -EINVAL; | 99 | return -EINVAL; |
100 | if (!grp->attrs && !grp->bin_attrs) { | 100 | if (!grp->attrs && !grp->bin_attrs) { |
101 | WARN(1, "sysfs: (bin_)attrs not set by subsystem for group: %s/%s\n", | 101 | WARN(1, "sysfs: (bin_)attrs not set by subsystem for group: %s/%s\n", |
102 | kobj->name, grp->name ? "" : grp->name); | 102 | kobj->name, grp->name ?: ""); |
103 | return -EINVAL; | 103 | return -EINVAL; |
104 | } | 104 | } |
105 | if (grp->name) { | 105 | if (grp->name) { |
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 7ed13e1e216a..4cfb3e82c56f 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c | |||
@@ -2032,6 +2032,8 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, | |||
2032 | long long blk_offs; | 2032 | long long blk_offs; |
2033 | struct ubifs_data_node *dn = node; | 2033 | struct ubifs_data_node *dn = node; |
2034 | 2034 | ||
2035 | ubifs_assert(zbr->len >= UBIFS_DATA_NODE_SZ); | ||
2036 | |||
2035 | /* | 2037 | /* |
2036 | * Search the inode node this data node belongs to and insert | 2038 | * Search the inode node this data node belongs to and insert |
2037 | * it to the RB-tree of inodes. | 2039 | * it to the RB-tree of inodes. |
@@ -2060,6 +2062,8 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, | |||
2060 | struct ubifs_dent_node *dent = node; | 2062 | struct ubifs_dent_node *dent = node; |
2061 | struct fsck_inode *fscki1; | 2063 | struct fsck_inode *fscki1; |
2062 | 2064 | ||
2065 | ubifs_assert(zbr->len >= UBIFS_DENT_NODE_SZ); | ||
2066 | |||
2063 | err = ubifs_validate_entry(c, dent); | 2067 | err = ubifs_validate_entry(c, dent); |
2064 | if (err) | 2068 | if (err) |
2065 | goto out_dump; | 2069 | goto out_dump; |
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index ea41649e4ca5..0fa6c803992e 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
@@ -108,8 +108,6 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, | |||
108 | inode->i_mtime = inode->i_atime = inode->i_ctime = | 108 | inode->i_mtime = inode->i_atime = inode->i_ctime = |
109 | ubifs_current_time(inode); | 109 | ubifs_current_time(inode); |
110 | inode->i_mapping->nrpages = 0; | 110 | inode->i_mapping->nrpages = 0; |
111 | /* Disable readahead */ | ||
112 | inode->i_mapping->backing_dev_info = &c->bdi; | ||
113 | 111 | ||
114 | switch (mode & S_IFMT) { | 112 | switch (mode & S_IFMT) { |
115 | case S_IFREG: | 113 | case S_IFREG: |
@@ -272,6 +270,10 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode, | |||
272 | goto out_budg; | 270 | goto out_budg; |
273 | } | 271 | } |
274 | 272 | ||
273 | err = ubifs_init_security(dir, inode, &dentry->d_name); | ||
274 | if (err) | ||
275 | goto out_cancel; | ||
276 | |||
275 | mutex_lock(&dir_ui->ui_mutex); | 277 | mutex_lock(&dir_ui->ui_mutex); |
276 | dir->i_size += sz_change; | 278 | dir->i_size += sz_change; |
277 | dir_ui->ui_size = dir->i_size; | 279 | dir_ui->ui_size = dir->i_size; |
@@ -728,6 +730,10 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
728 | goto out_budg; | 730 | goto out_budg; |
729 | } | 731 | } |
730 | 732 | ||
733 | err = ubifs_init_security(dir, inode, &dentry->d_name); | ||
734 | if (err) | ||
735 | goto out_cancel; | ||
736 | |||
731 | mutex_lock(&dir_ui->ui_mutex); | 737 | mutex_lock(&dir_ui->ui_mutex); |
732 | insert_inode_hash(inode); | 738 | insert_inode_hash(inode); |
733 | inc_nlink(inode); | 739 | inc_nlink(inode); |
@@ -808,6 +814,10 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry, | |||
808 | ui->data = dev; | 814 | ui->data = dev; |
809 | ui->data_len = devlen; | 815 | ui->data_len = devlen; |
810 | 816 | ||
817 | err = ubifs_init_security(dir, inode, &dentry->d_name); | ||
818 | if (err) | ||
819 | goto out_cancel; | ||
820 | |||
811 | mutex_lock(&dir_ui->ui_mutex); | 821 | mutex_lock(&dir_ui->ui_mutex); |
812 | dir->i_size += sz_change; | 822 | dir->i_size += sz_change; |
813 | dir_ui->ui_size = dir->i_size; | 823 | dir_ui->ui_size = dir->i_size; |
@@ -884,6 +894,10 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, | |||
884 | ui->data_len = len; | 894 | ui->data_len = len; |
885 | inode->i_size = ubifs_inode(inode)->ui_size = len; | 895 | inode->i_size = ubifs_inode(inode)->ui_size = len; |
886 | 896 | ||
897 | err = ubifs_init_security(dir, inode, &dentry->d_name); | ||
898 | if (err) | ||
899 | goto out_cancel; | ||
900 | |||
887 | mutex_lock(&dir_ui->ui_mutex); | 901 | mutex_lock(&dir_ui->ui_mutex); |
888 | dir->i_size += sz_change; | 902 | dir->i_size += sz_change; |
889 | dir_ui->ui_size = dir->i_size; | 903 | dir_ui->ui_size = dir->i_size; |
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 538519ee37d9..e627c0acf626 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
@@ -1536,7 +1536,6 @@ static const struct vm_operations_struct ubifs_file_vm_ops = { | |||
1536 | .fault = filemap_fault, | 1536 | .fault = filemap_fault, |
1537 | .map_pages = filemap_map_pages, | 1537 | .map_pages = filemap_map_pages, |
1538 | .page_mkwrite = ubifs_vm_page_mkwrite, | 1538 | .page_mkwrite = ubifs_vm_page_mkwrite, |
1539 | .remap_pages = generic_file_remap_pages, | ||
1540 | }; | 1539 | }; |
1541 | 1540 | ||
1542 | static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) | 1541 | static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) |
@@ -1574,6 +1573,10 @@ const struct inode_operations ubifs_symlink_inode_operations = { | |||
1574 | .follow_link = ubifs_follow_link, | 1573 | .follow_link = ubifs_follow_link, |
1575 | .setattr = ubifs_setattr, | 1574 | .setattr = ubifs_setattr, |
1576 | .getattr = ubifs_getattr, | 1575 | .getattr = ubifs_getattr, |
1576 | .setxattr = ubifs_setxattr, | ||
1577 | .getxattr = ubifs_getxattr, | ||
1578 | .listxattr = ubifs_listxattr, | ||
1579 | .removexattr = ubifs_removexattr, | ||
1577 | }; | 1580 | }; |
1578 | 1581 | ||
1579 | const struct file_operations ubifs_file_operations = { | 1582 | const struct file_operations ubifs_file_operations = { |
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 3187925e9879..9b40a1c5e160 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c | |||
@@ -1028,9 +1028,22 @@ int ubifs_replay_journal(struct ubifs_info *c) | |||
1028 | 1028 | ||
1029 | do { | 1029 | do { |
1030 | err = replay_log_leb(c, lnum, 0, c->sbuf); | 1030 | err = replay_log_leb(c, lnum, 0, c->sbuf); |
1031 | if (err == 1) | 1031 | if (err == 1) { |
1032 | /* We hit the end of the log */ | 1032 | if (lnum != c->lhead_lnum) |
1033 | break; | 1033 | /* We hit the end of the log */ |
1034 | break; | ||
1035 | |||
1036 | /* | ||
1037 | * The head of the log must always start with the | ||
1038 | * "commit start" node on a properly formatted UBIFS. | ||
1039 | * But we found no nodes at all, which means that | ||
1040 | * someting went wrong and we cannot proceed mounting | ||
1041 | * the file-system. | ||
1042 | */ | ||
1043 | ubifs_err("no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted", | ||
1044 | lnum, 0); | ||
1045 | err = -EINVAL; | ||
1046 | } | ||
1034 | if (err) | 1047 | if (err) |
1035 | goto out; | 1048 | goto out; |
1036 | lnum = ubifs_next_log_lnum(c, lnum); | 1049 | lnum = ubifs_next_log_lnum(c, lnum); |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 106bf20629ce..93e946561c5c 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -156,9 +156,6 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) | |||
156 | if (err) | 156 | if (err) |
157 | goto out_invalid; | 157 | goto out_invalid; |
158 | 158 | ||
159 | /* Disable read-ahead */ | ||
160 | inode->i_mapping->backing_dev_info = &c->bdi; | ||
161 | |||
162 | switch (inode->i_mode & S_IFMT) { | 159 | switch (inode->i_mode & S_IFMT) { |
163 | case S_IFREG: | 160 | case S_IFREG: |
164 | inode->i_mapping->a_ops = &ubifs_file_address_operations; | 161 | inode->i_mapping->a_ops = &ubifs_file_address_operations; |
@@ -2017,7 +2014,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) | |||
2017 | * Read-ahead will be disabled because @c->bdi.ra_pages is 0. | 2014 | * Read-ahead will be disabled because @c->bdi.ra_pages is 0. |
2018 | */ | 2015 | */ |
2019 | c->bdi.name = "ubifs", | 2016 | c->bdi.name = "ubifs", |
2020 | c->bdi.capabilities = BDI_CAP_MAP_COPY; | 2017 | c->bdi.capabilities = 0; |
2021 | err = bdi_init(&c->bdi); | 2018 | err = bdi_init(&c->bdi); |
2022 | if (err) | 2019 | if (err) |
2023 | goto out_close; | 2020 | goto out_close; |
@@ -2039,6 +2036,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) | |||
2039 | if (c->max_inode_sz > MAX_LFS_FILESIZE) | 2036 | if (c->max_inode_sz > MAX_LFS_FILESIZE) |
2040 | sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE; | 2037 | sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE; |
2041 | sb->s_op = &ubifs_super_operations; | 2038 | sb->s_op = &ubifs_super_operations; |
2039 | sb->s_xattr = ubifs_xattr_handlers; | ||
2042 | 2040 | ||
2043 | mutex_lock(&c->umount_mutex); | 2041 | mutex_lock(&c->umount_mutex); |
2044 | err = mount_ubifs(c); | 2042 | err = mount_ubifs(c); |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index c4fe900c67ab..bc04b9c69891 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/mtd/ubi.h> | 36 | #include <linux/mtd/ubi.h> |
37 | #include <linux/pagemap.h> | 37 | #include <linux/pagemap.h> |
38 | #include <linux/backing-dev.h> | 38 | #include <linux/backing-dev.h> |
39 | #include <linux/security.h> | ||
39 | #include "ubifs-media.h" | 40 | #include "ubifs-media.h" |
40 | 41 | ||
41 | /* Version of this UBIFS implementation */ | 42 | /* Version of this UBIFS implementation */ |
@@ -1465,6 +1466,7 @@ extern spinlock_t ubifs_infos_lock; | |||
1465 | extern atomic_long_t ubifs_clean_zn_cnt; | 1466 | extern atomic_long_t ubifs_clean_zn_cnt; |
1466 | extern struct kmem_cache *ubifs_inode_slab; | 1467 | extern struct kmem_cache *ubifs_inode_slab; |
1467 | extern const struct super_operations ubifs_super_operations; | 1468 | extern const struct super_operations ubifs_super_operations; |
1469 | extern const struct xattr_handler *ubifs_xattr_handlers[]; | ||
1468 | extern const struct address_space_operations ubifs_file_address_operations; | 1470 | extern const struct address_space_operations ubifs_file_address_operations; |
1469 | extern const struct file_operations ubifs_file_operations; | 1471 | extern const struct file_operations ubifs_file_operations; |
1470 | extern const struct inode_operations ubifs_file_inode_operations; | 1472 | extern const struct inode_operations ubifs_file_inode_operations; |
@@ -1754,6 +1756,8 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, | |||
1754 | size_t size); | 1756 | size_t size); |
1755 | ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size); | 1757 | ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size); |
1756 | int ubifs_removexattr(struct dentry *dentry, const char *name); | 1758 | int ubifs_removexattr(struct dentry *dentry, const char *name); |
1759 | int ubifs_init_security(struct inode *dentry, struct inode *inode, | ||
1760 | const struct qstr *qstr); | ||
1757 | 1761 | ||
1758 | /* super.c */ | 1762 | /* super.c */ |
1759 | struct inode *ubifs_iget(struct super_block *sb, unsigned long inum); | 1763 | struct inode *ubifs_iget(struct super_block *sb, unsigned long inum); |
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 5e0a63b1b0d5..a92be244a6fb 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c | |||
@@ -100,24 +100,30 @@ static const struct file_operations empty_fops; | |||
100 | static int create_xattr(struct ubifs_info *c, struct inode *host, | 100 | static int create_xattr(struct ubifs_info *c, struct inode *host, |
101 | const struct qstr *nm, const void *value, int size) | 101 | const struct qstr *nm, const void *value, int size) |
102 | { | 102 | { |
103 | int err; | 103 | int err, names_len; |
104 | struct inode *inode; | 104 | struct inode *inode; |
105 | struct ubifs_inode *ui, *host_ui = ubifs_inode(host); | 105 | struct ubifs_inode *ui, *host_ui = ubifs_inode(host); |
106 | struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, | 106 | struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, |
107 | .new_ino_d = ALIGN(size, 8), .dirtied_ino = 1, | 107 | .new_ino_d = ALIGN(size, 8), .dirtied_ino = 1, |
108 | .dirtied_ino_d = ALIGN(host_ui->data_len, 8) }; | 108 | .dirtied_ino_d = ALIGN(host_ui->data_len, 8) }; |
109 | 109 | ||
110 | if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE) | 110 | if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE) { |
111 | ubifs_err("inode %lu already has too many xattrs (%d), cannot create more", | ||
112 | host->i_ino, host_ui->xattr_cnt); | ||
111 | return -ENOSPC; | 113 | return -ENOSPC; |
114 | } | ||
112 | /* | 115 | /* |
113 | * Linux limits the maximum size of the extended attribute names list | 116 | * Linux limits the maximum size of the extended attribute names list |
114 | * to %XATTR_LIST_MAX. This means we should not allow creating more | 117 | * to %XATTR_LIST_MAX. This means we should not allow creating more |
115 | * extended attributes if the name list becomes larger. This limitation | 118 | * extended attributes if the name list becomes larger. This limitation |
116 | * is artificial for UBIFS, though. | 119 | * is artificial for UBIFS, though. |
117 | */ | 120 | */ |
118 | if (host_ui->xattr_names + host_ui->xattr_cnt + | 121 | names_len = host_ui->xattr_names + host_ui->xattr_cnt + nm->len + 1; |
119 | nm->len + 1 > XATTR_LIST_MAX) | 122 | if (names_len > XATTR_LIST_MAX) { |
123 | ubifs_err("cannot add one more xattr name to inode %lu, total names length would become %d, max. is %d", | ||
124 | host->i_ino, names_len, XATTR_LIST_MAX); | ||
120 | return -ENOSPC; | 125 | return -ENOSPC; |
126 | } | ||
121 | 127 | ||
122 | err = ubifs_budget_space(c, &req); | 128 | err = ubifs_budget_space(c, &req); |
123 | if (err) | 129 | if (err) |
@@ -293,18 +299,16 @@ static struct inode *iget_xattr(struct ubifs_info *c, ino_t inum) | |||
293 | return ERR_PTR(-EINVAL); | 299 | return ERR_PTR(-EINVAL); |
294 | } | 300 | } |
295 | 301 | ||
296 | int ubifs_setxattr(struct dentry *dentry, const char *name, | 302 | static int setxattr(struct inode *host, const char *name, const void *value, |
297 | const void *value, size_t size, int flags) | 303 | size_t size, int flags) |
298 | { | 304 | { |
299 | struct inode *inode, *host = dentry->d_inode; | 305 | struct inode *inode; |
300 | struct ubifs_info *c = host->i_sb->s_fs_info; | 306 | struct ubifs_info *c = host->i_sb->s_fs_info; |
301 | struct qstr nm = QSTR_INIT(name, strlen(name)); | 307 | struct qstr nm = QSTR_INIT(name, strlen(name)); |
302 | struct ubifs_dent_node *xent; | 308 | struct ubifs_dent_node *xent; |
303 | union ubifs_key key; | 309 | union ubifs_key key; |
304 | int err, type; | 310 | int err, type; |
305 | 311 | ||
306 | dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd", name, | ||
307 | host->i_ino, dentry, size); | ||
308 | ubifs_assert(mutex_is_locked(&host->i_mutex)); | 312 | ubifs_assert(mutex_is_locked(&host->i_mutex)); |
309 | 313 | ||
310 | if (size > UBIFS_MAX_INO_DATA) | 314 | if (size > UBIFS_MAX_INO_DATA) |
@@ -356,6 +360,15 @@ out_free: | |||
356 | return err; | 360 | return err; |
357 | } | 361 | } |
358 | 362 | ||
363 | int ubifs_setxattr(struct dentry *dentry, const char *name, | ||
364 | const void *value, size_t size, int flags) | ||
365 | { | ||
366 | dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd", | ||
367 | name, dentry->d_inode->i_ino, dentry, size); | ||
368 | |||
369 | return setxattr(dentry->d_inode, name, value, size, flags); | ||
370 | } | ||
371 | |||
359 | ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, | 372 | ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, |
360 | size_t size) | 373 | size_t size) |
361 | { | 374 | { |
@@ -568,3 +581,84 @@ out_free: | |||
568 | kfree(xent); | 581 | kfree(xent); |
569 | return err; | 582 | return err; |
570 | } | 583 | } |
584 | |||
585 | static size_t security_listxattr(struct dentry *d, char *list, size_t list_size, | ||
586 | const char *name, size_t name_len, int flags) | ||
587 | { | ||
588 | const int prefix_len = XATTR_SECURITY_PREFIX_LEN; | ||
589 | const size_t total_len = prefix_len + name_len + 1; | ||
590 | |||
591 | if (list && total_len <= list_size) { | ||
592 | memcpy(list, XATTR_SECURITY_PREFIX, prefix_len); | ||
593 | memcpy(list + prefix_len, name, name_len); | ||
594 | list[prefix_len + name_len] = '\0'; | ||
595 | } | ||
596 | |||
597 | return total_len; | ||
598 | } | ||
599 | |||
600 | static int security_getxattr(struct dentry *d, const char *name, void *buffer, | ||
601 | size_t size, int flags) | ||
602 | { | ||
603 | return ubifs_getxattr(d, name, buffer, size); | ||
604 | } | ||
605 | |||
606 | static int security_setxattr(struct dentry *d, const char *name, | ||
607 | const void *value, size_t size, int flags, | ||
608 | int handler_flags) | ||
609 | { | ||
610 | return ubifs_setxattr(d, name, value, size, flags); | ||
611 | } | ||
612 | |||
613 | static const struct xattr_handler ubifs_xattr_security_handler = { | ||
614 | .prefix = XATTR_SECURITY_PREFIX, | ||
615 | .list = security_listxattr, | ||
616 | .get = security_getxattr, | ||
617 | .set = security_setxattr, | ||
618 | }; | ||
619 | |||
620 | const struct xattr_handler *ubifs_xattr_handlers[] = { | ||
621 | &ubifs_xattr_security_handler, | ||
622 | NULL, | ||
623 | }; | ||
624 | |||
625 | static int init_xattrs(struct inode *inode, const struct xattr *xattr_array, | ||
626 | void *fs_info) | ||
627 | { | ||
628 | const struct xattr *xattr; | ||
629 | char *name; | ||
630 | int err = 0; | ||
631 | |||
632 | for (xattr = xattr_array; xattr->name != NULL; xattr++) { | ||
633 | name = kmalloc(XATTR_SECURITY_PREFIX_LEN + | ||
634 | strlen(xattr->name) + 1, GFP_NOFS); | ||
635 | if (!name) { | ||
636 | err = -ENOMEM; | ||
637 | break; | ||
638 | } | ||
639 | strcpy(name, XATTR_SECURITY_PREFIX); | ||
640 | strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name); | ||
641 | err = setxattr(inode, name, xattr->value, xattr->value_len, 0); | ||
642 | kfree(name); | ||
643 | if (err < 0) | ||
644 | break; | ||
645 | } | ||
646 | |||
647 | return err; | ||
648 | } | ||
649 | |||
650 | int ubifs_init_security(struct inode *dentry, struct inode *inode, | ||
651 | const struct qstr *qstr) | ||
652 | { | ||
653 | int err; | ||
654 | |||
655 | mutex_lock(&inode->i_mutex); | ||
656 | err = security_inode_init_security(inode, dentry, qstr, | ||
657 | &init_xattrs, 0); | ||
658 | mutex_unlock(&inode->i_mutex); | ||
659 | |||
660 | if (err) | ||
661 | ubifs_err("cannot initialize security for inode %lu, error %d", | ||
662 | inode->i_ino, err); | ||
663 | return err; | ||
664 | } | ||
diff --git a/fs/udf/Kconfig b/fs/udf/Kconfig index 0e0e99bd6bce..c6e17a744c3b 100644 --- a/fs/udf/Kconfig +++ b/fs/udf/Kconfig | |||
@@ -2,10 +2,12 @@ config UDF_FS | |||
2 | tristate "UDF file system support" | 2 | tristate "UDF file system support" |
3 | select CRC_ITU_T | 3 | select CRC_ITU_T |
4 | help | 4 | help |
5 | This is the new file system used on some CD-ROMs and DVDs. Say Y if | 5 | This is a file system used on some CD-ROMs and DVDs. Since the |
6 | you intend to mount DVD discs or CDRW's written in packet mode, or | 6 | file system is supported by multiple operating systems and is more |
7 | if written to by other UDF utilities, such as DirectCD. | 7 | compatible with standard unix file systems, it is also suitable for |
8 | Please read <file:Documentation/filesystems/udf.txt>. | 8 | removable USB disks. Say Y if you intend to mount DVD discs or CDRW's |
9 | written in packet mode, or if you want to use UDF for removable USB | ||
10 | disks. Please read <file:Documentation/filesystems/udf.txt>. | ||
9 | 11 | ||
10 | To compile this file system support as a module, choose M here: the | 12 | To compile this file system support as a module, choose M here: the |
11 | module will be called udf. | 13 | module will be called udf. |
diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 5bc71d9a674a..a445d599098d 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c | |||
@@ -750,7 +750,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, | |||
750 | /* Are we beyond EOF? */ | 750 | /* Are we beyond EOF? */ |
751 | if (etype == -1) { | 751 | if (etype == -1) { |
752 | int ret; | 752 | int ret; |
753 | isBeyondEOF = 1; | 753 | isBeyondEOF = true; |
754 | if (count) { | 754 | if (count) { |
755 | if (c) | 755 | if (c) |
756 | laarr[0] = laarr[1]; | 756 | laarr[0] = laarr[1]; |
@@ -792,7 +792,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, | |||
792 | endnum = c + 1; | 792 | endnum = c + 1; |
793 | lastblock = 1; | 793 | lastblock = 1; |
794 | } else { | 794 | } else { |
795 | isBeyondEOF = 0; | 795 | isBeyondEOF = false; |
796 | endnum = startnum = ((count > 2) ? 2 : count); | 796 | endnum = startnum = ((count > 2) ? 2 : count); |
797 | 797 | ||
798 | /* if the current extent is in position 0, | 798 | /* if the current extent is in position 0, |
@@ -1288,6 +1288,7 @@ static int udf_read_inode(struct inode *inode, bool hidden_inode) | |||
1288 | struct kernel_lb_addr *iloc = &iinfo->i_location; | 1288 | struct kernel_lb_addr *iloc = &iinfo->i_location; |
1289 | unsigned int link_count; | 1289 | unsigned int link_count; |
1290 | unsigned int indirections = 0; | 1290 | unsigned int indirections = 0; |
1291 | int bs = inode->i_sb->s_blocksize; | ||
1291 | int ret = -EIO; | 1292 | int ret = -EIO; |
1292 | 1293 | ||
1293 | reread: | 1294 | reread: |
@@ -1374,38 +1375,35 @@ reread: | |||
1374 | if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_EFE)) { | 1375 | if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_EFE)) { |
1375 | iinfo->i_efe = 1; | 1376 | iinfo->i_efe = 1; |
1376 | iinfo->i_use = 0; | 1377 | iinfo->i_use = 0; |
1377 | ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize - | 1378 | ret = udf_alloc_i_data(inode, bs - |
1378 | sizeof(struct extendedFileEntry)); | 1379 | sizeof(struct extendedFileEntry)); |
1379 | if (ret) | 1380 | if (ret) |
1380 | goto out; | 1381 | goto out; |
1381 | memcpy(iinfo->i_ext.i_data, | 1382 | memcpy(iinfo->i_ext.i_data, |
1382 | bh->b_data + sizeof(struct extendedFileEntry), | 1383 | bh->b_data + sizeof(struct extendedFileEntry), |
1383 | inode->i_sb->s_blocksize - | 1384 | bs - sizeof(struct extendedFileEntry)); |
1384 | sizeof(struct extendedFileEntry)); | ||
1385 | } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_FE)) { | 1385 | } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_FE)) { |
1386 | iinfo->i_efe = 0; | 1386 | iinfo->i_efe = 0; |
1387 | iinfo->i_use = 0; | 1387 | iinfo->i_use = 0; |
1388 | ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize - | 1388 | ret = udf_alloc_i_data(inode, bs - sizeof(struct fileEntry)); |
1389 | sizeof(struct fileEntry)); | ||
1390 | if (ret) | 1389 | if (ret) |
1391 | goto out; | 1390 | goto out; |
1392 | memcpy(iinfo->i_ext.i_data, | 1391 | memcpy(iinfo->i_ext.i_data, |
1393 | bh->b_data + sizeof(struct fileEntry), | 1392 | bh->b_data + sizeof(struct fileEntry), |
1394 | inode->i_sb->s_blocksize - sizeof(struct fileEntry)); | 1393 | bs - sizeof(struct fileEntry)); |
1395 | } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_USE)) { | 1394 | } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_USE)) { |
1396 | iinfo->i_efe = 0; | 1395 | iinfo->i_efe = 0; |
1397 | iinfo->i_use = 1; | 1396 | iinfo->i_use = 1; |
1398 | iinfo->i_lenAlloc = le32_to_cpu( | 1397 | iinfo->i_lenAlloc = le32_to_cpu( |
1399 | ((struct unallocSpaceEntry *)bh->b_data)-> | 1398 | ((struct unallocSpaceEntry *)bh->b_data)-> |
1400 | lengthAllocDescs); | 1399 | lengthAllocDescs); |
1401 | ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize - | 1400 | ret = udf_alloc_i_data(inode, bs - |
1402 | sizeof(struct unallocSpaceEntry)); | 1401 | sizeof(struct unallocSpaceEntry)); |
1403 | if (ret) | 1402 | if (ret) |
1404 | goto out; | 1403 | goto out; |
1405 | memcpy(iinfo->i_ext.i_data, | 1404 | memcpy(iinfo->i_ext.i_data, |
1406 | bh->b_data + sizeof(struct unallocSpaceEntry), | 1405 | bh->b_data + sizeof(struct unallocSpaceEntry), |
1407 | inode->i_sb->s_blocksize - | 1406 | bs - sizeof(struct unallocSpaceEntry)); |
1408 | sizeof(struct unallocSpaceEntry)); | ||
1409 | return 0; | 1407 | return 0; |
1410 | } | 1408 | } |
1411 | 1409 | ||
@@ -1489,6 +1487,15 @@ reread: | |||
1489 | } | 1487 | } |
1490 | inode->i_generation = iinfo->i_unique; | 1488 | inode->i_generation = iinfo->i_unique; |
1491 | 1489 | ||
1490 | /* | ||
1491 | * Sanity check length of allocation descriptors and extended attrs to | ||
1492 | * avoid integer overflows | ||
1493 | */ | ||
1494 | if (iinfo->i_lenEAttr > bs || iinfo->i_lenAlloc > bs) | ||
1495 | goto out; | ||
1496 | /* Now do exact checks */ | ||
1497 | if (udf_file_entry_alloc_offset(inode) + iinfo->i_lenAlloc > bs) | ||
1498 | goto out; | ||
1492 | /* Sanity checks for files in ICB so that we don't get confused later */ | 1499 | /* Sanity checks for files in ICB so that we don't get confused later */ |
1493 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { | 1500 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { |
1494 | /* | 1501 | /* |
@@ -1498,8 +1505,7 @@ reread: | |||
1498 | if (iinfo->i_lenAlloc != inode->i_size) | 1505 | if (iinfo->i_lenAlloc != inode->i_size) |
1499 | goto out; | 1506 | goto out; |
1500 | /* File in ICB has to fit in there... */ | 1507 | /* File in ICB has to fit in there... */ |
1501 | if (inode->i_size > inode->i_sb->s_blocksize - | 1508 | if (inode->i_size > bs - udf_file_entry_alloc_offset(inode)) |
1502 | udf_file_entry_alloc_offset(inode)) | ||
1503 | goto out; | 1509 | goto out; |
1504 | } | 1510 | } |
1505 | 1511 | ||
diff --git a/fs/udf/super.c b/fs/udf/super.c index 3ccb2f11fc76..f169411c4ea0 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
@@ -1599,7 +1599,7 @@ static noinline int udf_process_sequence( | |||
1599 | struct udf_vds_record *curr; | 1599 | struct udf_vds_record *curr; |
1600 | struct generic_desc *gd; | 1600 | struct generic_desc *gd; |
1601 | struct volDescPtr *vdp; | 1601 | struct volDescPtr *vdp; |
1602 | int done = 0; | 1602 | bool done = false; |
1603 | uint32_t vdsn; | 1603 | uint32_t vdsn; |
1604 | uint16_t ident; | 1604 | uint16_t ident; |
1605 | long next_s = 0, next_e = 0; | 1605 | long next_s = 0, next_e = 0; |
@@ -1680,7 +1680,7 @@ static noinline int udf_process_sequence( | |||
1680 | lastblock = next_e; | 1680 | lastblock = next_e; |
1681 | next_s = next_e = 0; | 1681 | next_s = next_e = 0; |
1682 | } else | 1682 | } else |
1683 | done = 1; | 1683 | done = true; |
1684 | break; | 1684 | break; |
1685 | } | 1685 | } |
1686 | brelse(bh); | 1686 | brelse(bh); |
@@ -2300,6 +2300,7 @@ static void udf_put_super(struct super_block *sb) | |||
2300 | udf_close_lvid(sb); | 2300 | udf_close_lvid(sb); |
2301 | brelse(sbi->s_lvid_bh); | 2301 | brelse(sbi->s_lvid_bh); |
2302 | udf_sb_free_partitions(sb); | 2302 | udf_sb_free_partitions(sb); |
2303 | mutex_destroy(&sbi->s_alloc_mutex); | ||
2303 | kfree(sb->s_fs_info); | 2304 | kfree(sb->s_fs_info); |
2304 | sb->s_fs_info = NULL; | 2305 | sb->s_fs_info = NULL; |
2305 | } | 2306 | } |
diff --git a/fs/ufs/super.c b/fs/ufs/super.c index da73801301d5..8092d3759a5e 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c | |||
@@ -95,22 +95,18 @@ | |||
95 | 95 | ||
96 | void lock_ufs(struct super_block *sb) | 96 | void lock_ufs(struct super_block *sb) |
97 | { | 97 | { |
98 | #if defined(CONFIG_SMP) || defined (CONFIG_PREEMPT) | ||
99 | struct ufs_sb_info *sbi = UFS_SB(sb); | 98 | struct ufs_sb_info *sbi = UFS_SB(sb); |
100 | 99 | ||
101 | mutex_lock(&sbi->mutex); | 100 | mutex_lock(&sbi->mutex); |
102 | sbi->mutex_owner = current; | 101 | sbi->mutex_owner = current; |
103 | #endif | ||
104 | } | 102 | } |
105 | 103 | ||
106 | void unlock_ufs(struct super_block *sb) | 104 | void unlock_ufs(struct super_block *sb) |
107 | { | 105 | { |
108 | #if defined(CONFIG_SMP) || defined (CONFIG_PREEMPT) | ||
109 | struct ufs_sb_info *sbi = UFS_SB(sb); | 106 | struct ufs_sb_info *sbi = UFS_SB(sb); |
110 | 107 | ||
111 | sbi->mutex_owner = NULL; | 108 | sbi->mutex_owner = NULL; |
112 | mutex_unlock(&sbi->mutex); | 109 | mutex_unlock(&sbi->mutex); |
113 | #endif | ||
114 | } | 110 | } |
115 | 111 | ||
116 | static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) | 112 | static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) |
@@ -1415,9 +1411,11 @@ static struct kmem_cache * ufs_inode_cachep; | |||
1415 | static struct inode *ufs_alloc_inode(struct super_block *sb) | 1411 | static struct inode *ufs_alloc_inode(struct super_block *sb) |
1416 | { | 1412 | { |
1417 | struct ufs_inode_info *ei; | 1413 | struct ufs_inode_info *ei; |
1418 | ei = (struct ufs_inode_info *)kmem_cache_alloc(ufs_inode_cachep, GFP_NOFS); | 1414 | |
1415 | ei = kmem_cache_alloc(ufs_inode_cachep, GFP_NOFS); | ||
1419 | if (!ei) | 1416 | if (!ei) |
1420 | return NULL; | 1417 | return NULL; |
1418 | |||
1421 | ei->vfs_inode.i_version = 1; | 1419 | ei->vfs_inode.i_version = 1; |
1422 | return &ei->vfs_inode; | 1420 | return &ei->vfs_inode; |
1423 | } | 1421 | } |
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 53e95b2a1369..a7a3a63bb360 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c | |||
@@ -91,16 +91,6 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags) | |||
91 | return ptr; | 91 | return ptr; |
92 | } | 92 | } |
93 | 93 | ||
94 | void | ||
95 | kmem_free(const void *ptr) | ||
96 | { | ||
97 | if (!is_vmalloc_addr(ptr)) { | ||
98 | kfree(ptr); | ||
99 | } else { | ||
100 | vfree(ptr); | ||
101 | } | ||
102 | } | ||
103 | |||
104 | void * | 94 | void * |
105 | kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, | 95 | kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, |
106 | xfs_km_flags_t flags) | 96 | xfs_km_flags_t flags) |
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 64db0e53edea..cc6b768fc068 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h | |||
@@ -63,7 +63,10 @@ kmem_flags_convert(xfs_km_flags_t flags) | |||
63 | extern void *kmem_alloc(size_t, xfs_km_flags_t); | 63 | extern void *kmem_alloc(size_t, xfs_km_flags_t); |
64 | extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t); | 64 | extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t); |
65 | extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t); | 65 | extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t); |
66 | extern void kmem_free(const void *); | 66 | static inline void kmem_free(const void *ptr) |
67 | { | ||
68 | kvfree(ptr); | ||
69 | } | ||
67 | 70 | ||
68 | 71 | ||
69 | extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); | 72 | extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); |
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 5d38e8b8a913..15105dbc9e28 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c | |||
@@ -403,7 +403,7 @@ xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp) | |||
403 | if (!xfs_sb_version_hasattr2(&mp->m_sb)) { | 403 | if (!xfs_sb_version_hasattr2(&mp->m_sb)) { |
404 | xfs_sb_version_addattr2(&mp->m_sb); | 404 | xfs_sb_version_addattr2(&mp->m_sb); |
405 | spin_unlock(&mp->m_sb_lock); | 405 | spin_unlock(&mp->m_sb_lock); |
406 | xfs_mod_sb(tp, XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); | 406 | xfs_log_sb(tp); |
407 | } else | 407 | } else |
408 | spin_unlock(&mp->m_sb_lock); | 408 | spin_unlock(&mp->m_sb_lock); |
409 | } | 409 | } |
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index b5eb4743f75a..61ec015dca16 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c | |||
@@ -973,7 +973,11 @@ xfs_bmap_local_to_extents( | |||
973 | *firstblock = args.fsbno; | 973 | *firstblock = args.fsbno; |
974 | bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); | 974 | bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); |
975 | 975 | ||
976 | /* initialise the block and copy the data */ | 976 | /* |
977 | * Initialise the block and copy the data | ||
978 | * | ||
979 | * Note: init_fn must set the buffer log item type correctly! | ||
980 | */ | ||
977 | init_fn(tp, bp, ip, ifp); | 981 | init_fn(tp, bp, ip, ifp); |
978 | 982 | ||
979 | /* account for the change in fork size and log everything */ | 983 | /* account for the change in fork size and log everything */ |
@@ -1221,22 +1225,20 @@ xfs_bmap_add_attrfork( | |||
1221 | goto bmap_cancel; | 1225 | goto bmap_cancel; |
1222 | if (!xfs_sb_version_hasattr(&mp->m_sb) || | 1226 | if (!xfs_sb_version_hasattr(&mp->m_sb) || |
1223 | (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { | 1227 | (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { |
1224 | __int64_t sbfields = 0; | 1228 | bool log_sb = false; |
1225 | 1229 | ||
1226 | spin_lock(&mp->m_sb_lock); | 1230 | spin_lock(&mp->m_sb_lock); |
1227 | if (!xfs_sb_version_hasattr(&mp->m_sb)) { | 1231 | if (!xfs_sb_version_hasattr(&mp->m_sb)) { |
1228 | xfs_sb_version_addattr(&mp->m_sb); | 1232 | xfs_sb_version_addattr(&mp->m_sb); |
1229 | sbfields |= XFS_SB_VERSIONNUM; | 1233 | log_sb = true; |
1230 | } | 1234 | } |
1231 | if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) { | 1235 | if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) { |
1232 | xfs_sb_version_addattr2(&mp->m_sb); | 1236 | xfs_sb_version_addattr2(&mp->m_sb); |
1233 | sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); | 1237 | log_sb = true; |
1234 | } | 1238 | } |
1235 | if (sbfields) { | 1239 | spin_unlock(&mp->m_sb_lock); |
1236 | spin_unlock(&mp->m_sb_lock); | 1240 | if (log_sb) |
1237 | xfs_mod_sb(tp, sbfields); | 1241 | xfs_log_sb(tp); |
1238 | } else | ||
1239 | spin_unlock(&mp->m_sb_lock); | ||
1240 | } | 1242 | } |
1241 | 1243 | ||
1242 | error = xfs_bmap_finish(&tp, &flist, &committed); | 1244 | error = xfs_bmap_finish(&tp, &flist, &committed); |
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 44db6db86402..b9d8a499d2c4 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h | |||
@@ -28,6 +28,37 @@ struct xfs_trans; | |||
28 | extern kmem_zone_t *xfs_bmap_free_item_zone; | 28 | extern kmem_zone_t *xfs_bmap_free_item_zone; |
29 | 29 | ||
30 | /* | 30 | /* |
31 | * Argument structure for xfs_bmap_alloc. | ||
32 | */ | ||
33 | struct xfs_bmalloca { | ||
34 | xfs_fsblock_t *firstblock; /* i/o first block allocated */ | ||
35 | struct xfs_bmap_free *flist; /* bmap freelist */ | ||
36 | struct xfs_trans *tp; /* transaction pointer */ | ||
37 | struct xfs_inode *ip; /* incore inode pointer */ | ||
38 | struct xfs_bmbt_irec prev; /* extent before the new one */ | ||
39 | struct xfs_bmbt_irec got; /* extent after, or delayed */ | ||
40 | |||
41 | xfs_fileoff_t offset; /* offset in file filling in */ | ||
42 | xfs_extlen_t length; /* i/o length asked/allocated */ | ||
43 | xfs_fsblock_t blkno; /* starting block of new extent */ | ||
44 | |||
45 | struct xfs_btree_cur *cur; /* btree cursor */ | ||
46 | xfs_extnum_t idx; /* current extent index */ | ||
47 | int nallocs;/* number of extents alloc'd */ | ||
48 | int logflags;/* flags for transaction logging */ | ||
49 | |||
50 | xfs_extlen_t total; /* total blocks needed for xaction */ | ||
51 | xfs_extlen_t minlen; /* minimum allocation size (blocks) */ | ||
52 | xfs_extlen_t minleft; /* amount must be left after alloc */ | ||
53 | bool eof; /* set if allocating past last extent */ | ||
54 | bool wasdel; /* replacing a delayed allocation */ | ||
55 | bool userdata;/* set if is user data */ | ||
56 | bool aeof; /* allocated space at eof */ | ||
57 | bool conv; /* overwriting unwritten extents */ | ||
58 | int flags; | ||
59 | }; | ||
60 | |||
61 | /* | ||
31 | * List of extents to be free "later". | 62 | * List of extents to be free "later". |
32 | * The list is kept sorted on xbf_startblock. | 63 | * The list is kept sorted on xbf_startblock. |
33 | */ | 64 | */ |
@@ -149,6 +180,8 @@ void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); | |||
149 | void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len, | 180 | void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len, |
150 | struct xfs_bmap_free *flist, struct xfs_mount *mp); | 181 | struct xfs_bmap_free *flist, struct xfs_mount *mp); |
151 | void xfs_bmap_cancel(struct xfs_bmap_free *flist); | 182 | void xfs_bmap_cancel(struct xfs_bmap_free *flist); |
183 | int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist, | ||
184 | int *committed); | ||
152 | void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork); | 185 | void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork); |
153 | int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, | 186 | int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, |
154 | xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); | 187 | xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); |
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index fbd6da263571..8eb718979383 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h | |||
@@ -151,10 +151,13 @@ typedef struct xfs_sb { | |||
151 | __uint32_t sb_features2; /* additional feature bits */ | 151 | __uint32_t sb_features2; /* additional feature bits */ |
152 | 152 | ||
153 | /* | 153 | /* |
154 | * bad features2 field as a result of failing to pad the sb | 154 | * bad features2 field as a result of failing to pad the sb structure to |
155 | * structure to 64 bits. Some machines will be using this field | 155 | * 64 bits. Some machines will be using this field for features2 bits. |
156 | * for features2 bits. Easiest just to mark it bad and not use | 156 | * Easiest just to mark it bad and not use it for anything else. |
157 | * it for anything else. | 157 | * |
158 | * This is not kept up to date in memory; it is always overwritten by | ||
159 | * the value in sb_features2 when formatting the incore superblock to | ||
160 | * the disk buffer. | ||
158 | */ | 161 | */ |
159 | __uint32_t sb_bad_features2; | 162 | __uint32_t sb_bad_features2; |
160 | 163 | ||
@@ -304,8 +307,8 @@ typedef enum { | |||
304 | #define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT) | 307 | #define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT) |
305 | #define XFS_SB_IFREE XFS_SB_MVAL(IFREE) | 308 | #define XFS_SB_IFREE XFS_SB_MVAL(IFREE) |
306 | #define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS) | 309 | #define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS) |
307 | #define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2) | 310 | #define XFS_SB_FEATURES2 (XFS_SB_MVAL(FEATURES2) | \ |
308 | #define XFS_SB_BAD_FEATURES2 XFS_SB_MVAL(BAD_FEATURES2) | 311 | XFS_SB_MVAL(BAD_FEATURES2)) |
309 | #define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT) | 312 | #define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT) |
310 | #define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT) | 313 | #define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT) |
311 | #define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT) | 314 | #define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT) |
@@ -319,9 +322,9 @@ typedef enum { | |||
319 | XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \ | 322 | XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \ |
320 | XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \ | 323 | XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \ |
321 | XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \ | 324 | XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \ |
322 | XFS_SB_BAD_FEATURES2 | XFS_SB_FEATURES_COMPAT | \ | 325 | XFS_SB_FEATURES_COMPAT | XFS_SB_FEATURES_RO_COMPAT | \ |
323 | XFS_SB_FEATURES_RO_COMPAT | XFS_SB_FEATURES_INCOMPAT | \ | 326 | XFS_SB_FEATURES_INCOMPAT | XFS_SB_FEATURES_LOG_INCOMPAT | \ |
324 | XFS_SB_FEATURES_LOG_INCOMPAT | XFS_SB_PQUOTINO) | 327 | XFS_SB_PQUOTINO) |
325 | 328 | ||
326 | 329 | ||
327 | /* | 330 | /* |
@@ -453,13 +456,11 @@ static inline void xfs_sb_version_addattr2(struct xfs_sb *sbp) | |||
453 | { | 456 | { |
454 | sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; | 457 | sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; |
455 | sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT; | 458 | sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT; |
456 | sbp->sb_bad_features2 |= XFS_SB_VERSION2_ATTR2BIT; | ||
457 | } | 459 | } |
458 | 460 | ||
459 | static inline void xfs_sb_version_removeattr2(struct xfs_sb *sbp) | 461 | static inline void xfs_sb_version_removeattr2(struct xfs_sb *sbp) |
460 | { | 462 | { |
461 | sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; | 463 | sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; |
462 | sbp->sb_bad_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; | ||
463 | if (!sbp->sb_features2) | 464 | if (!sbp->sb_features2) |
464 | sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT; | 465 | sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT; |
465 | } | 466 | } |
@@ -475,7 +476,6 @@ static inline void xfs_sb_version_addprojid32bit(struct xfs_sb *sbp) | |||
475 | { | 476 | { |
476 | sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; | 477 | sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; |
477 | sbp->sb_features2 |= XFS_SB_VERSION2_PROJID32BIT; | 478 | sbp->sb_features2 |= XFS_SB_VERSION2_PROJID32BIT; |
478 | sbp->sb_bad_features2 |= XFS_SB_VERSION2_PROJID32BIT; | ||
479 | } | 479 | } |
480 | 480 | ||
481 | /* | 481 | /* |
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 18dc721ca19f..18dc721ca19f 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h | |||
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 752915fa775a..b0a5fe95a3e2 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c | |||
@@ -40,69 +40,6 @@ | |||
40 | * Physical superblock buffer manipulations. Shared with libxfs in userspace. | 40 | * Physical superblock buffer manipulations. Shared with libxfs in userspace. |
41 | */ | 41 | */ |
42 | 42 | ||
43 | static const struct { | ||
44 | short offset; | ||
45 | short type; /* 0 = integer | ||
46 | * 1 = binary / string (no translation) | ||
47 | */ | ||
48 | } xfs_sb_info[] = { | ||
49 | { offsetof(xfs_sb_t, sb_magicnum), 0 }, | ||
50 | { offsetof(xfs_sb_t, sb_blocksize), 0 }, | ||
51 | { offsetof(xfs_sb_t, sb_dblocks), 0 }, | ||
52 | { offsetof(xfs_sb_t, sb_rblocks), 0 }, | ||
53 | { offsetof(xfs_sb_t, sb_rextents), 0 }, | ||
54 | { offsetof(xfs_sb_t, sb_uuid), 1 }, | ||
55 | { offsetof(xfs_sb_t, sb_logstart), 0 }, | ||
56 | { offsetof(xfs_sb_t, sb_rootino), 0 }, | ||
57 | { offsetof(xfs_sb_t, sb_rbmino), 0 }, | ||
58 | { offsetof(xfs_sb_t, sb_rsumino), 0 }, | ||
59 | { offsetof(xfs_sb_t, sb_rextsize), 0 }, | ||
60 | { offsetof(xfs_sb_t, sb_agblocks), 0 }, | ||
61 | { offsetof(xfs_sb_t, sb_agcount), 0 }, | ||
62 | { offsetof(xfs_sb_t, sb_rbmblocks), 0 }, | ||
63 | { offsetof(xfs_sb_t, sb_logblocks), 0 }, | ||
64 | { offsetof(xfs_sb_t, sb_versionnum), 0 }, | ||
65 | { offsetof(xfs_sb_t, sb_sectsize), 0 }, | ||
66 | { offsetof(xfs_sb_t, sb_inodesize), 0 }, | ||
67 | { offsetof(xfs_sb_t, sb_inopblock), 0 }, | ||
68 | { offsetof(xfs_sb_t, sb_fname[0]), 1 }, | ||
69 | { offsetof(xfs_sb_t, sb_blocklog), 0 }, | ||
70 | { offsetof(xfs_sb_t, sb_sectlog), 0 }, | ||
71 | { offsetof(xfs_sb_t, sb_inodelog), 0 }, | ||
72 | { offsetof(xfs_sb_t, sb_inopblog), 0 }, | ||
73 | { offsetof(xfs_sb_t, sb_agblklog), 0 }, | ||
74 | { offsetof(xfs_sb_t, sb_rextslog), 0 }, | ||
75 | { offsetof(xfs_sb_t, sb_inprogress), 0 }, | ||
76 | { offsetof(xfs_sb_t, sb_imax_pct), 0 }, | ||
77 | { offsetof(xfs_sb_t, sb_icount), 0 }, | ||
78 | { offsetof(xfs_sb_t, sb_ifree), 0 }, | ||
79 | { offsetof(xfs_sb_t, sb_fdblocks), 0 }, | ||
80 | { offsetof(xfs_sb_t, sb_frextents), 0 }, | ||
81 | { offsetof(xfs_sb_t, sb_uquotino), 0 }, | ||
82 | { offsetof(xfs_sb_t, sb_gquotino), 0 }, | ||
83 | { offsetof(xfs_sb_t, sb_qflags), 0 }, | ||
84 | { offsetof(xfs_sb_t, sb_flags), 0 }, | ||
85 | { offsetof(xfs_sb_t, sb_shared_vn), 0 }, | ||
86 | { offsetof(xfs_sb_t, sb_inoalignmt), 0 }, | ||
87 | { offsetof(xfs_sb_t, sb_unit), 0 }, | ||
88 | { offsetof(xfs_sb_t, sb_width), 0 }, | ||
89 | { offsetof(xfs_sb_t, sb_dirblklog), 0 }, | ||
90 | { offsetof(xfs_sb_t, sb_logsectlog), 0 }, | ||
91 | { offsetof(xfs_sb_t, sb_logsectsize), 0 }, | ||
92 | { offsetof(xfs_sb_t, sb_logsunit), 0 }, | ||
93 | { offsetof(xfs_sb_t, sb_features2), 0 }, | ||
94 | { offsetof(xfs_sb_t, sb_bad_features2), 0 }, | ||
95 | { offsetof(xfs_sb_t, sb_features_compat), 0 }, | ||
96 | { offsetof(xfs_sb_t, sb_features_ro_compat), 0 }, | ||
97 | { offsetof(xfs_sb_t, sb_features_incompat), 0 }, | ||
98 | { offsetof(xfs_sb_t, sb_features_log_incompat), 0 }, | ||
99 | { offsetof(xfs_sb_t, sb_crc), 0 }, | ||
100 | { offsetof(xfs_sb_t, sb_pad), 0 }, | ||
101 | { offsetof(xfs_sb_t, sb_pquotino), 0 }, | ||
102 | { offsetof(xfs_sb_t, sb_lsn), 0 }, | ||
103 | { sizeof(xfs_sb_t), 0 } | ||
104 | }; | ||
105 | |||
106 | /* | 43 | /* |
107 | * Reference counting access wrappers to the perag structures. | 44 | * Reference counting access wrappers to the perag structures. |
108 | * Because we never free per-ag structures, the only thing we | 45 | * Because we never free per-ag structures, the only thing we |
@@ -461,58 +398,49 @@ xfs_sb_from_disk( | |||
461 | __xfs_sb_from_disk(to, from, true); | 398 | __xfs_sb_from_disk(to, from, true); |
462 | } | 399 | } |
463 | 400 | ||
464 | static inline void | 401 | static void |
465 | xfs_sb_quota_to_disk( | 402 | xfs_sb_quota_to_disk( |
466 | xfs_dsb_t *to, | 403 | struct xfs_dsb *to, |
467 | xfs_sb_t *from, | 404 | struct xfs_sb *from) |
468 | __int64_t *fields) | ||
469 | { | 405 | { |
470 | __uint16_t qflags = from->sb_qflags; | 406 | __uint16_t qflags = from->sb_qflags; |
471 | 407 | ||
408 | to->sb_uquotino = cpu_to_be64(from->sb_uquotino); | ||
409 | if (xfs_sb_version_has_pquotino(from)) { | ||
410 | to->sb_qflags = cpu_to_be16(from->sb_qflags); | ||
411 | to->sb_gquotino = cpu_to_be64(from->sb_gquotino); | ||
412 | to->sb_pquotino = cpu_to_be64(from->sb_pquotino); | ||
413 | return; | ||
414 | } | ||
415 | |||
472 | /* | 416 | /* |
473 | * We need to do these manipilations only if we are working | 417 | * The in-core version of sb_qflags do not have XFS_OQUOTA_* |
474 | * with an older version of on-disk superblock. | 418 | * flags, whereas the on-disk version does. So, convert incore |
419 | * XFS_{PG}QUOTA_* flags to on-disk XFS_OQUOTA_* flags. | ||
475 | */ | 420 | */ |
476 | if (xfs_sb_version_has_pquotino(from)) | 421 | qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD | |
477 | return; | 422 | XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD); |
478 | 423 | ||
479 | if (*fields & XFS_SB_QFLAGS) { | 424 | if (from->sb_qflags & |
480 | /* | 425 | (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD)) |
481 | * The in-core version of sb_qflags do not have | 426 | qflags |= XFS_OQUOTA_ENFD; |
482 | * XFS_OQUOTA_* flags, whereas the on-disk version | 427 | if (from->sb_qflags & |
483 | * does. So, convert incore XFS_{PG}QUOTA_* flags | 428 | (XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) |
484 | * to on-disk XFS_OQUOTA_* flags. | 429 | qflags |= XFS_OQUOTA_CHKD; |
485 | */ | 430 | to->sb_qflags = cpu_to_be16(qflags); |
486 | qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD | | ||
487 | XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD); | ||
488 | |||
489 | if (from->sb_qflags & | ||
490 | (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD)) | ||
491 | qflags |= XFS_OQUOTA_ENFD; | ||
492 | if (from->sb_qflags & | ||
493 | (XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) | ||
494 | qflags |= XFS_OQUOTA_CHKD; | ||
495 | to->sb_qflags = cpu_to_be16(qflags); | ||
496 | *fields &= ~XFS_SB_QFLAGS; | ||
497 | } | ||
498 | 431 | ||
499 | /* | 432 | /* |
500 | * GQUOTINO and PQUOTINO cannot be used together in versions of | 433 | * GQUOTINO and PQUOTINO cannot be used together in versions |
501 | * superblock that do not have pquotino. from->sb_flags tells us which | 434 | * of superblock that do not have pquotino. from->sb_flags |
502 | * quota is active and should be copied to disk. If neither are active, | 435 | * tells us which quota is active and should be copied to |
503 | * make sure we write NULLFSINO to the sb_gquotino field as a quota | 436 | * disk. If neither are active, we should NULL the inode. |
504 | * inode value of "0" is invalid when the XFS_SB_VERSION_QUOTA feature | ||
505 | * bit is set. | ||
506 | * | 437 | * |
507 | * Note that we don't need to handle the sb_uquotino or sb_pquotino here | 438 | * In all cases, the separate pquotino must remain 0 because it |
508 | * as they do not require any translation. Hence the main sb field loop | 439 | * it beyond the "end" of the valid non-pquotino superblock. |
509 | * will write them appropriately from the in-core superblock. | ||
510 | */ | 440 | */ |
511 | if ((*fields & XFS_SB_GQUOTINO) && | 441 | if (from->sb_qflags & XFS_GQUOTA_ACCT) |
512 | (from->sb_qflags & XFS_GQUOTA_ACCT)) | ||
513 | to->sb_gquotino = cpu_to_be64(from->sb_gquotino); | 442 | to->sb_gquotino = cpu_to_be64(from->sb_gquotino); |
514 | else if ((*fields & XFS_SB_PQUOTINO) && | 443 | else if (from->sb_qflags & XFS_PQUOTA_ACCT) |
515 | (from->sb_qflags & XFS_PQUOTA_ACCT)) | ||
516 | to->sb_gquotino = cpu_to_be64(from->sb_pquotino); | 444 | to->sb_gquotino = cpu_to_be64(from->sb_pquotino); |
517 | else { | 445 | else { |
518 | /* | 446 | /* |
@@ -526,63 +454,78 @@ xfs_sb_quota_to_disk( | |||
526 | to->sb_gquotino = cpu_to_be64(NULLFSINO); | 454 | to->sb_gquotino = cpu_to_be64(NULLFSINO); |
527 | } | 455 | } |
528 | 456 | ||
529 | *fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO); | 457 | to->sb_pquotino = 0; |
530 | } | 458 | } |
531 | 459 | ||
532 | /* | ||
533 | * Copy in core superblock to ondisk one. | ||
534 | * | ||
535 | * The fields argument is mask of superblock fields to copy. | ||
536 | */ | ||
537 | void | 460 | void |
538 | xfs_sb_to_disk( | 461 | xfs_sb_to_disk( |
539 | xfs_dsb_t *to, | 462 | struct xfs_dsb *to, |
540 | xfs_sb_t *from, | 463 | struct xfs_sb *from) |
541 | __int64_t fields) | ||
542 | { | 464 | { |
543 | xfs_caddr_t to_ptr = (xfs_caddr_t)to; | 465 | xfs_sb_quota_to_disk(to, from); |
544 | xfs_caddr_t from_ptr = (xfs_caddr_t)from; | ||
545 | xfs_sb_field_t f; | ||
546 | int first; | ||
547 | int size; | ||
548 | |||
549 | ASSERT(fields); | ||
550 | if (!fields) | ||
551 | return; | ||
552 | 466 | ||
553 | /* We should never write the crc here, it's updated in the IO path */ | 467 | to->sb_magicnum = cpu_to_be32(from->sb_magicnum); |
554 | fields &= ~XFS_SB_CRC; | 468 | to->sb_blocksize = cpu_to_be32(from->sb_blocksize); |
555 | 469 | to->sb_dblocks = cpu_to_be64(from->sb_dblocks); | |
556 | xfs_sb_quota_to_disk(to, from, &fields); | 470 | to->sb_rblocks = cpu_to_be64(from->sb_rblocks); |
557 | while (fields) { | 471 | to->sb_rextents = cpu_to_be64(from->sb_rextents); |
558 | f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); | 472 | memcpy(&to->sb_uuid, &from->sb_uuid, sizeof(to->sb_uuid)); |
559 | first = xfs_sb_info[f].offset; | 473 | to->sb_logstart = cpu_to_be64(from->sb_logstart); |
560 | size = xfs_sb_info[f + 1].offset - first; | 474 | to->sb_rootino = cpu_to_be64(from->sb_rootino); |
561 | 475 | to->sb_rbmino = cpu_to_be64(from->sb_rbmino); | |
562 | ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1); | 476 | to->sb_rsumino = cpu_to_be64(from->sb_rsumino); |
563 | 477 | to->sb_rextsize = cpu_to_be32(from->sb_rextsize); | |
564 | if (size == 1 || xfs_sb_info[f].type == 1) { | 478 | to->sb_agblocks = cpu_to_be32(from->sb_agblocks); |
565 | memcpy(to_ptr + first, from_ptr + first, size); | 479 | to->sb_agcount = cpu_to_be32(from->sb_agcount); |
566 | } else { | 480 | to->sb_rbmblocks = cpu_to_be32(from->sb_rbmblocks); |
567 | switch (size) { | 481 | to->sb_logblocks = cpu_to_be32(from->sb_logblocks); |
568 | case 2: | 482 | to->sb_versionnum = cpu_to_be16(from->sb_versionnum); |
569 | *(__be16 *)(to_ptr + first) = | 483 | to->sb_sectsize = cpu_to_be16(from->sb_sectsize); |
570 | cpu_to_be16(*(__u16 *)(from_ptr + first)); | 484 | to->sb_inodesize = cpu_to_be16(from->sb_inodesize); |
571 | break; | 485 | to->sb_inopblock = cpu_to_be16(from->sb_inopblock); |
572 | case 4: | 486 | memcpy(&to->sb_fname, &from->sb_fname, sizeof(to->sb_fname)); |
573 | *(__be32 *)(to_ptr + first) = | 487 | to->sb_blocklog = from->sb_blocklog; |
574 | cpu_to_be32(*(__u32 *)(from_ptr + first)); | 488 | to->sb_sectlog = from->sb_sectlog; |
575 | break; | 489 | to->sb_inodelog = from->sb_inodelog; |
576 | case 8: | 490 | to->sb_inopblog = from->sb_inopblog; |
577 | *(__be64 *)(to_ptr + first) = | 491 | to->sb_agblklog = from->sb_agblklog; |
578 | cpu_to_be64(*(__u64 *)(from_ptr + first)); | 492 | to->sb_rextslog = from->sb_rextslog; |
579 | break; | 493 | to->sb_inprogress = from->sb_inprogress; |
580 | default: | 494 | to->sb_imax_pct = from->sb_imax_pct; |
581 | ASSERT(0); | 495 | to->sb_icount = cpu_to_be64(from->sb_icount); |
582 | } | 496 | to->sb_ifree = cpu_to_be64(from->sb_ifree); |
583 | } | 497 | to->sb_fdblocks = cpu_to_be64(from->sb_fdblocks); |
498 | to->sb_frextents = cpu_to_be64(from->sb_frextents); | ||
584 | 499 | ||
585 | fields &= ~(1LL << f); | 500 | to->sb_flags = from->sb_flags; |
501 | to->sb_shared_vn = from->sb_shared_vn; | ||
502 | to->sb_inoalignmt = cpu_to_be32(from->sb_inoalignmt); | ||
503 | to->sb_unit = cpu_to_be32(from->sb_unit); | ||
504 | to->sb_width = cpu_to_be32(from->sb_width); | ||
505 | to->sb_dirblklog = from->sb_dirblklog; | ||
506 | to->sb_logsectlog = from->sb_logsectlog; | ||
507 | to->sb_logsectsize = cpu_to_be16(from->sb_logsectsize); | ||
508 | to->sb_logsunit = cpu_to_be32(from->sb_logsunit); | ||
509 | |||
510 | /* | ||
511 | * We need to ensure that bad_features2 always matches features2. | ||
512 | * Hence we enforce that here rather than having to remember to do it | ||
513 | * everywhere else that updates features2. | ||
514 | */ | ||
515 | from->sb_bad_features2 = from->sb_features2; | ||
516 | to->sb_features2 = cpu_to_be32(from->sb_features2); | ||
517 | to->sb_bad_features2 = cpu_to_be32(from->sb_bad_features2); | ||
518 | |||
519 | if (xfs_sb_version_hascrc(from)) { | ||
520 | to->sb_features_compat = cpu_to_be32(from->sb_features_compat); | ||
521 | to->sb_features_ro_compat = | ||
522 | cpu_to_be32(from->sb_features_ro_compat); | ||
523 | to->sb_features_incompat = | ||
524 | cpu_to_be32(from->sb_features_incompat); | ||
525 | to->sb_features_log_incompat = | ||
526 | cpu_to_be32(from->sb_features_log_incompat); | ||
527 | to->sb_pad = 0; | ||
528 | to->sb_lsn = cpu_to_be64(from->sb_lsn); | ||
586 | } | 529 | } |
587 | } | 530 | } |
588 | 531 | ||
@@ -816,42 +759,51 @@ xfs_initialize_perag_data( | |||
816 | } | 759 | } |
817 | 760 | ||
818 | /* | 761 | /* |
819 | * xfs_mod_sb() can be used to copy arbitrary changes to the | 762 | * xfs_log_sb() can be used to copy arbitrary changes to the in-core superblock |
820 | * in-core superblock into the superblock buffer to be logged. | 763 | * into the superblock buffer to be logged. It does not provide the higher |
821 | * It does not provide the higher level of locking that is | 764 | * level of locking that is needed to protect the in-core superblock from |
822 | * needed to protect the in-core superblock from concurrent | 765 | * concurrent access. |
823 | * access. | ||
824 | */ | 766 | */ |
825 | void | 767 | void |
826 | xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) | 768 | xfs_log_sb( |
769 | struct xfs_trans *tp) | ||
827 | { | 770 | { |
828 | xfs_buf_t *bp; | 771 | struct xfs_mount *mp = tp->t_mountp; |
829 | int first; | 772 | struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0); |
830 | int last; | ||
831 | xfs_mount_t *mp; | ||
832 | xfs_sb_field_t f; | ||
833 | |||
834 | ASSERT(fields); | ||
835 | if (!fields) | ||
836 | return; | ||
837 | mp = tp->t_mountp; | ||
838 | bp = xfs_trans_getsb(tp, mp, 0); | ||
839 | first = sizeof(xfs_sb_t); | ||
840 | last = 0; | ||
841 | |||
842 | /* translate/copy */ | ||
843 | 773 | ||
844 | xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields); | 774 | xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); |
775 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); | ||
776 | xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb)); | ||
777 | } | ||
845 | 778 | ||
846 | /* find modified range */ | 779 | /* |
847 | f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields); | 780 | * xfs_sync_sb |
848 | ASSERT((1LL << f) & XFS_SB_MOD_BITS); | 781 | * |
849 | last = xfs_sb_info[f + 1].offset - 1; | 782 | * Sync the superblock to disk. |
783 | * | ||
784 | * Note that the caller is responsible for checking the frozen state of the | ||
785 | * filesystem. This procedure uses the non-blocking transaction allocator and | ||
786 | * thus will allow modifications to a frozen fs. This is required because this | ||
787 | * code can be called during the process of freezing where use of the high-level | ||
788 | * allocator would deadlock. | ||
789 | */ | ||
790 | int | ||
791 | xfs_sync_sb( | ||
792 | struct xfs_mount *mp, | ||
793 | bool wait) | ||
794 | { | ||
795 | struct xfs_trans *tp; | ||
796 | int error; | ||
850 | 797 | ||
851 | f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); | 798 | tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_CHANGE, KM_SLEEP); |
852 | ASSERT((1LL << f) & XFS_SB_MOD_BITS); | 799 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); |
853 | first = xfs_sb_info[f].offset; | 800 | if (error) { |
801 | xfs_trans_cancel(tp, 0); | ||
802 | return error; | ||
803 | } | ||
854 | 804 | ||
855 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); | 805 | xfs_log_sb(tp); |
856 | xfs_trans_log_buf(tp, bp, first, last); | 806 | if (wait) |
807 | xfs_trans_set_sync(tp); | ||
808 | return xfs_trans_commit(tp, 0); | ||
857 | } | 809 | } |
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 8eb1c54bafbf..b25bb9a343f3 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h | |||
@@ -27,11 +27,12 @@ extern struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *, xfs_agnumber_t, | |||
27 | extern void xfs_perag_put(struct xfs_perag *pag); | 27 | extern void xfs_perag_put(struct xfs_perag *pag); |
28 | extern int xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t); | 28 | extern int xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t); |
29 | 29 | ||
30 | extern void xfs_sb_calc_crc(struct xfs_buf *); | 30 | extern void xfs_sb_calc_crc(struct xfs_buf *bp); |
31 | extern void xfs_mod_sb(struct xfs_trans *, __int64_t); | 31 | extern void xfs_log_sb(struct xfs_trans *tp); |
32 | extern void xfs_sb_mount_common(struct xfs_mount *, struct xfs_sb *); | 32 | extern int xfs_sync_sb(struct xfs_mount *mp, bool wait); |
33 | extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); | 33 | extern void xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp); |
34 | extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); | 34 | extern void xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from); |
35 | extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from); | ||
35 | extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); | 36 | extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); |
36 | 37 | ||
37 | #endif /* __XFS_SB_H__ */ | 38 | #endif /* __XFS_SB_H__ */ |
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 82404da2ca67..8dda4b321343 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h | |||
@@ -82,7 +82,7 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; | |||
82 | #define XFS_TRANS_ATTR_RM 23 | 82 | #define XFS_TRANS_ATTR_RM 23 |
83 | #define XFS_TRANS_ATTR_FLAG 24 | 83 | #define XFS_TRANS_ATTR_FLAG 24 |
84 | #define XFS_TRANS_CLEAR_AGI_BUCKET 25 | 84 | #define XFS_TRANS_CLEAR_AGI_BUCKET 25 |
85 | #define XFS_TRANS_QM_SBCHANGE 26 | 85 | #define XFS_TRANS_SB_CHANGE 26 |
86 | /* | 86 | /* |
87 | * Dummy entries since we use the transaction type to index into the | 87 | * Dummy entries since we use the transaction type to index into the |
88 | * trans_type[] in xlog_recover_print_trans_head() | 88 | * trans_type[] in xlog_recover_print_trans_head() |
@@ -95,17 +95,15 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; | |||
95 | #define XFS_TRANS_QM_DQCLUSTER 32 | 95 | #define XFS_TRANS_QM_DQCLUSTER 32 |
96 | #define XFS_TRANS_QM_QINOCREATE 33 | 96 | #define XFS_TRANS_QM_QINOCREATE 33 |
97 | #define XFS_TRANS_QM_QUOTAOFF_END 34 | 97 | #define XFS_TRANS_QM_QUOTAOFF_END 34 |
98 | #define XFS_TRANS_SB_UNIT 35 | 98 | #define XFS_TRANS_FSYNC_TS 35 |
99 | #define XFS_TRANS_FSYNC_TS 36 | 99 | #define XFS_TRANS_GROWFSRT_ALLOC 36 |
100 | #define XFS_TRANS_GROWFSRT_ALLOC 37 | 100 | #define XFS_TRANS_GROWFSRT_ZERO 37 |
101 | #define XFS_TRANS_GROWFSRT_ZERO 38 | 101 | #define XFS_TRANS_GROWFSRT_FREE 38 |
102 | #define XFS_TRANS_GROWFSRT_FREE 39 | 102 | #define XFS_TRANS_SWAPEXT 39 |
103 | #define XFS_TRANS_SWAPEXT 40 | 103 | #define XFS_TRANS_CHECKPOINT 40 |
104 | #define XFS_TRANS_SB_COUNT 41 | 104 | #define XFS_TRANS_ICREATE 41 |
105 | #define XFS_TRANS_CHECKPOINT 42 | 105 | #define XFS_TRANS_CREATE_TMPFILE 42 |
106 | #define XFS_TRANS_ICREATE 43 | 106 | #define XFS_TRANS_TYPE_MAX 43 |
107 | #define XFS_TRANS_CREATE_TMPFILE 44 | ||
108 | #define XFS_TRANS_TYPE_MAX 44 | ||
109 | /* new transaction types need to be reflected in xfs_logprint(8) */ | 107 | /* new transaction types need to be reflected in xfs_logprint(8) */ |
110 | 108 | ||
111 | #define XFS_TRANS_TYPES \ | 109 | #define XFS_TRANS_TYPES \ |
@@ -113,7 +111,6 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; | |||
113 | { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \ | 111 | { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \ |
114 | { XFS_TRANS_INACTIVE, "INACTIVE" }, \ | 112 | { XFS_TRANS_INACTIVE, "INACTIVE" }, \ |
115 | { XFS_TRANS_CREATE, "CREATE" }, \ | 113 | { XFS_TRANS_CREATE, "CREATE" }, \ |
116 | { XFS_TRANS_CREATE_TMPFILE, "CREATE_TMPFILE" }, \ | ||
117 | { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \ | 114 | { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \ |
118 | { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \ | 115 | { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \ |
119 | { XFS_TRANS_REMOVE, "REMOVE" }, \ | 116 | { XFS_TRANS_REMOVE, "REMOVE" }, \ |
@@ -134,23 +131,23 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; | |||
134 | { XFS_TRANS_ATTR_RM, "ATTR_RM" }, \ | 131 | { XFS_TRANS_ATTR_RM, "ATTR_RM" }, \ |
135 | { XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \ | 132 | { XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \ |
136 | { XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \ | 133 | { XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \ |
137 | { XFS_TRANS_QM_SBCHANGE, "QM_SBCHANGE" }, \ | 134 | { XFS_TRANS_SB_CHANGE, "SBCHANGE" }, \ |
135 | { XFS_TRANS_DUMMY1, "DUMMY1" }, \ | ||
136 | { XFS_TRANS_DUMMY2, "DUMMY2" }, \ | ||
138 | { XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \ | 137 | { XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \ |
139 | { XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \ | 138 | { XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \ |
140 | { XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \ | 139 | { XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \ |
141 | { XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \ | 140 | { XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \ |
142 | { XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \ | 141 | { XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \ |
143 | { XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \ | 142 | { XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \ |
144 | { XFS_TRANS_SB_UNIT, "SB_UNIT" }, \ | ||
145 | { XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \ | 143 | { XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \ |
146 | { XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \ | 144 | { XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \ |
147 | { XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \ | 145 | { XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \ |
148 | { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \ | 146 | { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \ |
149 | { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \ | 147 | { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \ |
150 | { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \ | ||
151 | { XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \ | 148 | { XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \ |
152 | { XFS_TRANS_DUMMY1, "DUMMY1" }, \ | 149 | { XFS_TRANS_ICREATE, "ICREATE" }, \ |
153 | { XFS_TRANS_DUMMY2, "DUMMY2" }, \ | 150 | { XFS_TRANS_CREATE_TMPFILE, "CREATE_TMPFILE" }, \ |
154 | { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" } | 151 | { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" } |
155 | 152 | ||
156 | /* | 153 | /* |
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index c80c5236c3da..e7e26bd6468f 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c | |||
@@ -178,6 +178,8 @@ xfs_symlink_local_to_remote( | |||
178 | struct xfs_mount *mp = ip->i_mount; | 178 | struct xfs_mount *mp = ip->i_mount; |
179 | char *buf; | 179 | char *buf; |
180 | 180 | ||
181 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SYMLINK_BUF); | ||
182 | |||
181 | if (!xfs_sb_version_hascrc(&mp->m_sb)) { | 183 | if (!xfs_sb_version_hascrc(&mp->m_sb)) { |
182 | bp->b_ops = NULL; | 184 | bp->b_ops = NULL; |
183 | memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); | 185 | memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); |
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 6c1330f29050..68cb1e7bf2bb 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c | |||
@@ -716,17 +716,6 @@ xfs_calc_clear_agi_bucket_reservation( | |||
716 | } | 716 | } |
717 | 717 | ||
718 | /* | 718 | /* |
719 | * Clearing the quotaflags in the superblock. | ||
720 | * the super block for changing quota flags: sector size | ||
721 | */ | ||
722 | STATIC uint | ||
723 | xfs_calc_qm_sbchange_reservation( | ||
724 | struct xfs_mount *mp) | ||
725 | { | ||
726 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); | ||
727 | } | ||
728 | |||
729 | /* | ||
730 | * Adjusting quota limits. | 719 | * Adjusting quota limits. |
731 | * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot) | 720 | * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot) |
732 | */ | 721 | */ |
@@ -864,9 +853,6 @@ xfs_trans_resv_calc( | |||
864 | * The following transactions are logged in logical format with | 853 | * The following transactions are logged in logical format with |
865 | * a default log count. | 854 | * a default log count. |
866 | */ | 855 | */ |
867 | resp->tr_qm_sbchange.tr_logres = xfs_calc_qm_sbchange_reservation(mp); | ||
868 | resp->tr_qm_sbchange.tr_logcount = XFS_DEFAULT_LOG_COUNT; | ||
869 | |||
870 | resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation(mp); | 856 | resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation(mp); |
871 | resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT; | 857 | resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT; |
872 | 858 | ||
diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h index 1097d14cd583..2d5bdfce6d8f 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.h +++ b/fs/xfs/libxfs/xfs_trans_resv.h | |||
@@ -56,7 +56,6 @@ struct xfs_trans_resv { | |||
56 | struct xfs_trans_res tr_growrtalloc; /* grow realtime allocations */ | 56 | struct xfs_trans_res tr_growrtalloc; /* grow realtime allocations */ |
57 | struct xfs_trans_res tr_growrtzero; /* grow realtime zeroing */ | 57 | struct xfs_trans_res tr_growrtzero; /* grow realtime zeroing */ |
58 | struct xfs_trans_res tr_growrtfree; /* grow realtime freeing */ | 58 | struct xfs_trans_res tr_growrtfree; /* grow realtime freeing */ |
59 | struct xfs_trans_res tr_qm_sbchange; /* change quota flags */ | ||
60 | struct xfs_trans_res tr_qm_setqlim; /* adjust quota limits */ | 59 | struct xfs_trans_res tr_qm_setqlim; /* adjust quota limits */ |
61 | struct xfs_trans_res tr_qm_dqalloc; /* allocate quota on disk */ | 60 | struct xfs_trans_res tr_qm_dqalloc; /* allocate quota on disk */ |
62 | struct xfs_trans_res tr_qm_quotaoff; /* turn quota off */ | 61 | struct xfs_trans_res tr_qm_quotaoff; /* turn quota off */ |
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index b79dc66b2ecd..b79dc66b2ecd 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h | |||
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 18e2f3bbae5e..3a9b7a1b8704 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -135,30 +135,22 @@ xfs_setfilesize_trans_alloc( | |||
135 | */ | 135 | */ |
136 | STATIC int | 136 | STATIC int |
137 | xfs_setfilesize( | 137 | xfs_setfilesize( |
138 | struct xfs_ioend *ioend) | 138 | struct xfs_inode *ip, |
139 | struct xfs_trans *tp, | ||
140 | xfs_off_t offset, | ||
141 | size_t size) | ||
139 | { | 142 | { |
140 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | ||
141 | struct xfs_trans *tp = ioend->io_append_trans; | ||
142 | xfs_fsize_t isize; | 143 | xfs_fsize_t isize; |
143 | 144 | ||
144 | /* | ||
145 | * The transaction may have been allocated in the I/O submission thread, | ||
146 | * thus we need to mark ourselves as beeing in a transaction manually. | ||
147 | * Similarly for freeze protection. | ||
148 | */ | ||
149 | current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); | ||
150 | rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], | ||
151 | 0, 1, _THIS_IP_); | ||
152 | |||
153 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 145 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
154 | isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); | 146 | isize = xfs_new_eof(ip, offset + size); |
155 | if (!isize) { | 147 | if (!isize) { |
156 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 148 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
157 | xfs_trans_cancel(tp, 0); | 149 | xfs_trans_cancel(tp, 0); |
158 | return 0; | 150 | return 0; |
159 | } | 151 | } |
160 | 152 | ||
161 | trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); | 153 | trace_xfs_setfilesize(ip, offset, size); |
162 | 154 | ||
163 | ip->i_d.di_size = isize; | 155 | ip->i_d.di_size = isize; |
164 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | 156 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
@@ -167,6 +159,25 @@ xfs_setfilesize( | |||
167 | return xfs_trans_commit(tp, 0); | 159 | return xfs_trans_commit(tp, 0); |
168 | } | 160 | } |
169 | 161 | ||
162 | STATIC int | ||
163 | xfs_setfilesize_ioend( | ||
164 | struct xfs_ioend *ioend) | ||
165 | { | ||
166 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | ||
167 | struct xfs_trans *tp = ioend->io_append_trans; | ||
168 | |||
169 | /* | ||
170 | * The transaction may have been allocated in the I/O submission thread, | ||
171 | * thus we need to mark ourselves as being in a transaction manually. | ||
172 | * Similarly for freeze protection. | ||
173 | */ | ||
174 | current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); | ||
175 | rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], | ||
176 | 0, 1, _THIS_IP_); | ||
177 | |||
178 | return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); | ||
179 | } | ||
180 | |||
170 | /* | 181 | /* |
171 | * Schedule IO completion handling on the final put of an ioend. | 182 | * Schedule IO completion handling on the final put of an ioend. |
172 | * | 183 | * |
@@ -182,8 +193,7 @@ xfs_finish_ioend( | |||
182 | 193 | ||
183 | if (ioend->io_type == XFS_IO_UNWRITTEN) | 194 | if (ioend->io_type == XFS_IO_UNWRITTEN) |
184 | queue_work(mp->m_unwritten_workqueue, &ioend->io_work); | 195 | queue_work(mp->m_unwritten_workqueue, &ioend->io_work); |
185 | else if (ioend->io_append_trans || | 196 | else if (ioend->io_append_trans) |
186 | (ioend->io_isdirect && xfs_ioend_is_append(ioend))) | ||
187 | queue_work(mp->m_data_workqueue, &ioend->io_work); | 197 | queue_work(mp->m_data_workqueue, &ioend->io_work); |
188 | else | 198 | else |
189 | xfs_destroy_ioend(ioend); | 199 | xfs_destroy_ioend(ioend); |
@@ -215,22 +225,8 @@ xfs_end_io( | |||
215 | if (ioend->io_type == XFS_IO_UNWRITTEN) { | 225 | if (ioend->io_type == XFS_IO_UNWRITTEN) { |
216 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, | 226 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, |
217 | ioend->io_size); | 227 | ioend->io_size); |
218 | } else if (ioend->io_isdirect && xfs_ioend_is_append(ioend)) { | ||
219 | /* | ||
220 | * For direct I/O we do not know if we need to allocate blocks | ||
221 | * or not so we can't preallocate an append transaction as that | ||
222 | * results in nested reservations and log space deadlocks. Hence | ||
223 | * allocate the transaction here. While this is sub-optimal and | ||
224 | * can block IO completion for some time, we're stuck with doing | ||
225 | * it this way until we can pass the ioend to the direct IO | ||
226 | * allocation callbacks and avoid nesting that way. | ||
227 | */ | ||
228 | error = xfs_setfilesize_trans_alloc(ioend); | ||
229 | if (error) | ||
230 | goto done; | ||
231 | error = xfs_setfilesize(ioend); | ||
232 | } else if (ioend->io_append_trans) { | 228 | } else if (ioend->io_append_trans) { |
233 | error = xfs_setfilesize(ioend); | 229 | error = xfs_setfilesize_ioend(ioend); |
234 | } else { | 230 | } else { |
235 | ASSERT(!xfs_ioend_is_append(ioend)); | 231 | ASSERT(!xfs_ioend_is_append(ioend)); |
236 | } | 232 | } |
@@ -242,17 +238,6 @@ done: | |||
242 | } | 238 | } |
243 | 239 | ||
244 | /* | 240 | /* |
245 | * Call IO completion handling in caller context on the final put of an ioend. | ||
246 | */ | ||
247 | STATIC void | ||
248 | xfs_finish_ioend_sync( | ||
249 | struct xfs_ioend *ioend) | ||
250 | { | ||
251 | if (atomic_dec_and_test(&ioend->io_remaining)) | ||
252 | xfs_end_io(&ioend->io_work); | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * Allocate and initialise an IO completion structure. | 241 | * Allocate and initialise an IO completion structure. |
257 | * We need to track unwritten extent write completion here initially. | 242 | * We need to track unwritten extent write completion here initially. |
258 | * We'll need to extend this for updating the ondisk inode size later | 243 | * We'll need to extend this for updating the ondisk inode size later |
@@ -273,7 +258,6 @@ xfs_alloc_ioend( | |||
273 | * all the I/O from calling the completion routine too early. | 258 | * all the I/O from calling the completion routine too early. |
274 | */ | 259 | */ |
275 | atomic_set(&ioend->io_remaining, 1); | 260 | atomic_set(&ioend->io_remaining, 1); |
276 | ioend->io_isdirect = 0; | ||
277 | ioend->io_error = 0; | 261 | ioend->io_error = 0; |
278 | ioend->io_list = NULL; | 262 | ioend->io_list = NULL; |
279 | ioend->io_type = type; | 263 | ioend->io_type = type; |
@@ -1459,11 +1443,7 @@ xfs_get_blocks_direct( | |||
1459 | * | 1443 | * |
1460 | * If the private argument is non-NULL __xfs_get_blocks signals us that we | 1444 | * If the private argument is non-NULL __xfs_get_blocks signals us that we |
1461 | * need to issue a transaction to convert the range from unwritten to written | 1445 | * need to issue a transaction to convert the range from unwritten to written |
1462 | * extents. In case this is regular synchronous I/O we just call xfs_end_io | 1446 | * extents. |
1463 | * to do this and we are done. But in case this was a successful AIO | ||
1464 | * request this handler is called from interrupt context, from which we | ||
1465 | * can't start transactions. In that case offload the I/O completion to | ||
1466 | * the workqueues we also use for buffered I/O completion. | ||
1467 | */ | 1447 | */ |
1468 | STATIC void | 1448 | STATIC void |
1469 | xfs_end_io_direct_write( | 1449 | xfs_end_io_direct_write( |
@@ -1472,7 +1452,12 @@ xfs_end_io_direct_write( | |||
1472 | ssize_t size, | 1452 | ssize_t size, |
1473 | void *private) | 1453 | void *private) |
1474 | { | 1454 | { |
1475 | struct xfs_ioend *ioend = iocb->private; | 1455 | struct inode *inode = file_inode(iocb->ki_filp); |
1456 | struct xfs_inode *ip = XFS_I(inode); | ||
1457 | struct xfs_mount *mp = ip->i_mount; | ||
1458 | |||
1459 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1460 | return; | ||
1476 | 1461 | ||
1477 | /* | 1462 | /* |
1478 | * While the generic direct I/O code updates the inode size, it does | 1463 | * While the generic direct I/O code updates the inode size, it does |
@@ -1480,22 +1465,33 @@ xfs_end_io_direct_write( | |||
1480 | * end_io handler thinks the on-disk size is outside the in-core | 1465 | * end_io handler thinks the on-disk size is outside the in-core |
1481 | * size. To prevent this just update it a little bit earlier here. | 1466 | * size. To prevent this just update it a little bit earlier here. |
1482 | */ | 1467 | */ |
1483 | if (offset + size > i_size_read(ioend->io_inode)) | 1468 | if (offset + size > i_size_read(inode)) |
1484 | i_size_write(ioend->io_inode, offset + size); | 1469 | i_size_write(inode, offset + size); |
1485 | 1470 | ||
1486 | /* | 1471 | /* |
1487 | * blockdev_direct_IO can return an error even after the I/O | 1472 | * For direct I/O we do not know if we need to allocate blocks or not, |
1488 | * completion handler was called. Thus we need to protect | 1473 | * so we can't preallocate an append transaction, as that results in |
1489 | * against double-freeing. | 1474 | * nested reservations and log space deadlocks. Hence allocate the |
1475 | * transaction here. While this is sub-optimal and can block IO | ||
1476 | * completion for some time, we're stuck with doing it this way until | ||
1477 | * we can pass the ioend to the direct IO allocation callbacks and | ||
1478 | * avoid nesting that way. | ||
1490 | */ | 1479 | */ |
1491 | iocb->private = NULL; | 1480 | if (private && size > 0) { |
1492 | 1481 | xfs_iomap_write_unwritten(ip, offset, size); | |
1493 | ioend->io_offset = offset; | 1482 | } else if (offset + size > ip->i_d.di_size) { |
1494 | ioend->io_size = size; | 1483 | struct xfs_trans *tp; |
1495 | if (private && size > 0) | 1484 | int error; |
1496 | ioend->io_type = XFS_IO_UNWRITTEN; | 1485 | |
1486 | tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); | ||
1487 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); | ||
1488 | if (error) { | ||
1489 | xfs_trans_cancel(tp, 0); | ||
1490 | return; | ||
1491 | } | ||
1497 | 1492 | ||
1498 | xfs_finish_ioend_sync(ioend); | 1493 | xfs_setfilesize(ip, tp, offset, size); |
1494 | } | ||
1499 | } | 1495 | } |
1500 | 1496 | ||
1501 | STATIC ssize_t | 1497 | STATIC ssize_t |
@@ -1507,39 +1503,16 @@ xfs_vm_direct_IO( | |||
1507 | { | 1503 | { |
1508 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 1504 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
1509 | struct block_device *bdev = xfs_find_bdev_for_inode(inode); | 1505 | struct block_device *bdev = xfs_find_bdev_for_inode(inode); |
1510 | struct xfs_ioend *ioend = NULL; | ||
1511 | ssize_t ret; | ||
1512 | 1506 | ||
1513 | if (rw & WRITE) { | 1507 | if (rw & WRITE) { |
1514 | size_t size = iov_iter_count(iter); | 1508 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, |
1515 | |||
1516 | /* | ||
1517 | * We cannot preallocate a size update transaction here as we | ||
1518 | * don't know whether allocation is necessary or not. Hence we | ||
1519 | * can only tell IO completion that one is necessary if we are | ||
1520 | * not doing unwritten extent conversion. | ||
1521 | */ | ||
1522 | iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT); | ||
1523 | if (offset + size > XFS_I(inode)->i_d.di_size) | ||
1524 | ioend->io_isdirect = 1; | ||
1525 | |||
1526 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, | ||
1527 | offset, xfs_get_blocks_direct, | 1509 | offset, xfs_get_blocks_direct, |
1528 | xfs_end_io_direct_write, NULL, | 1510 | xfs_end_io_direct_write, NULL, |
1529 | DIO_ASYNC_EXTEND); | 1511 | DIO_ASYNC_EXTEND); |
1530 | if (ret != -EIOCBQUEUED && iocb->private) | ||
1531 | goto out_destroy_ioend; | ||
1532 | } else { | ||
1533 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, | ||
1534 | offset, xfs_get_blocks_direct, | ||
1535 | NULL, NULL, 0); | ||
1536 | } | 1512 | } |
1537 | 1513 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, | |
1538 | return ret; | 1514 | offset, xfs_get_blocks_direct, |
1539 | 1515 | NULL, NULL, 0); | |
1540 | out_destroy_ioend: | ||
1541 | xfs_destroy_ioend(ioend); | ||
1542 | return ret; | ||
1543 | } | 1516 | } |
1544 | 1517 | ||
1545 | /* | 1518 | /* |
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index f94dd459dff9..ac644e0137a4 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h | |||
@@ -24,14 +24,12 @@ extern mempool_t *xfs_ioend_pool; | |||
24 | * Types of I/O for bmap clustering and I/O completion tracking. | 24 | * Types of I/O for bmap clustering and I/O completion tracking. |
25 | */ | 25 | */ |
26 | enum { | 26 | enum { |
27 | XFS_IO_DIRECT = 0, /* special case for direct I/O ioends */ | ||
28 | XFS_IO_DELALLOC, /* covers delalloc region */ | 27 | XFS_IO_DELALLOC, /* covers delalloc region */ |
29 | XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */ | 28 | XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */ |
30 | XFS_IO_OVERWRITE, /* covers already allocated extent */ | 29 | XFS_IO_OVERWRITE, /* covers already allocated extent */ |
31 | }; | 30 | }; |
32 | 31 | ||
33 | #define XFS_IO_TYPES \ | 32 | #define XFS_IO_TYPES \ |
34 | { 0, "" }, \ | ||
35 | { XFS_IO_DELALLOC, "delalloc" }, \ | 33 | { XFS_IO_DELALLOC, "delalloc" }, \ |
36 | { XFS_IO_UNWRITTEN, "unwritten" }, \ | 34 | { XFS_IO_UNWRITTEN, "unwritten" }, \ |
37 | { XFS_IO_OVERWRITE, "overwrite" } | 35 | { XFS_IO_OVERWRITE, "overwrite" } |
@@ -45,7 +43,6 @@ typedef struct xfs_ioend { | |||
45 | unsigned int io_type; /* delalloc / unwritten */ | 43 | unsigned int io_type; /* delalloc / unwritten */ |
46 | int io_error; /* I/O error code */ | 44 | int io_error; /* I/O error code */ |
47 | atomic_t io_remaining; /* hold count */ | 45 | atomic_t io_remaining; /* hold count */ |
48 | unsigned int io_isdirect : 1;/* direct I/O */ | ||
49 | struct inode *io_inode; /* file being written to */ | 46 | struct inode *io_inode; /* file being written to */ |
50 | struct buffer_head *io_buffer_head;/* buffer linked list head */ | 47 | struct buffer_head *io_buffer_head;/* buffer linked list head */ |
51 | struct buffer_head *io_buffer_tail;/* buffer linked list tail */ | 48 | struct buffer_head *io_buffer_tail;/* buffer linked list tail */ |
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 2fdb72d2c908..736429a72a12 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h | |||
@@ -26,43 +26,8 @@ struct xfs_ifork; | |||
26 | struct xfs_inode; | 26 | struct xfs_inode; |
27 | struct xfs_mount; | 27 | struct xfs_mount; |
28 | struct xfs_trans; | 28 | struct xfs_trans; |
29 | struct xfs_bmalloca; | ||
29 | 30 | ||
30 | /* | ||
31 | * Argument structure for xfs_bmap_alloc. | ||
32 | */ | ||
33 | struct xfs_bmalloca { | ||
34 | xfs_fsblock_t *firstblock; /* i/o first block allocated */ | ||
35 | struct xfs_bmap_free *flist; /* bmap freelist */ | ||
36 | struct xfs_trans *tp; /* transaction pointer */ | ||
37 | struct xfs_inode *ip; /* incore inode pointer */ | ||
38 | struct xfs_bmbt_irec prev; /* extent before the new one */ | ||
39 | struct xfs_bmbt_irec got; /* extent after, or delayed */ | ||
40 | |||
41 | xfs_fileoff_t offset; /* offset in file filling in */ | ||
42 | xfs_extlen_t length; /* i/o length asked/allocated */ | ||
43 | xfs_fsblock_t blkno; /* starting block of new extent */ | ||
44 | |||
45 | struct xfs_btree_cur *cur; /* btree cursor */ | ||
46 | xfs_extnum_t idx; /* current extent index */ | ||
47 | int nallocs;/* number of extents alloc'd */ | ||
48 | int logflags;/* flags for transaction logging */ | ||
49 | |||
50 | xfs_extlen_t total; /* total blocks needed for xaction */ | ||
51 | xfs_extlen_t minlen; /* minimum allocation size (blocks) */ | ||
52 | xfs_extlen_t minleft; /* amount must be left after alloc */ | ||
53 | bool eof; /* set if allocating past last extent */ | ||
54 | bool wasdel; /* replacing a delayed allocation */ | ||
55 | bool userdata;/* set if is user data */ | ||
56 | bool aeof; /* allocated space at eof */ | ||
57 | bool conv; /* overwriting unwritten extents */ | ||
58 | int flags; | ||
59 | struct completion *done; | ||
60 | struct work_struct work; | ||
61 | int result; | ||
62 | }; | ||
63 | |||
64 | int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist, | ||
65 | int *committed); | ||
66 | int xfs_bmap_rtalloc(struct xfs_bmalloca *ap); | 31 | int xfs_bmap_rtalloc(struct xfs_bmalloca *ap); |
67 | int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff, | 32 | int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff, |
68 | int whichfork, int *eof); | 33 | int whichfork, int *eof); |
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index bb502a391792..1790b00bea7a 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -1488,6 +1488,7 @@ xfs_buf_iomove( | |||
1488 | static enum lru_status | 1488 | static enum lru_status |
1489 | xfs_buftarg_wait_rele( | 1489 | xfs_buftarg_wait_rele( |
1490 | struct list_head *item, | 1490 | struct list_head *item, |
1491 | struct list_lru_one *lru, | ||
1491 | spinlock_t *lru_lock, | 1492 | spinlock_t *lru_lock, |
1492 | void *arg) | 1493 | void *arg) |
1493 | 1494 | ||
@@ -1509,7 +1510,7 @@ xfs_buftarg_wait_rele( | |||
1509 | */ | 1510 | */ |
1510 | atomic_set(&bp->b_lru_ref, 0); | 1511 | atomic_set(&bp->b_lru_ref, 0); |
1511 | bp->b_state |= XFS_BSTATE_DISPOSE; | 1512 | bp->b_state |= XFS_BSTATE_DISPOSE; |
1512 | list_move(item, dispose); | 1513 | list_lru_isolate_move(lru, item, dispose); |
1513 | spin_unlock(&bp->b_lock); | 1514 | spin_unlock(&bp->b_lock); |
1514 | return LRU_REMOVED; | 1515 | return LRU_REMOVED; |
1515 | } | 1516 | } |
@@ -1546,6 +1547,7 @@ xfs_wait_buftarg( | |||
1546 | static enum lru_status | 1547 | static enum lru_status |
1547 | xfs_buftarg_isolate( | 1548 | xfs_buftarg_isolate( |
1548 | struct list_head *item, | 1549 | struct list_head *item, |
1550 | struct list_lru_one *lru, | ||
1549 | spinlock_t *lru_lock, | 1551 | spinlock_t *lru_lock, |
1550 | void *arg) | 1552 | void *arg) |
1551 | { | 1553 | { |
@@ -1569,7 +1571,7 @@ xfs_buftarg_isolate( | |||
1569 | } | 1571 | } |
1570 | 1572 | ||
1571 | bp->b_state |= XFS_BSTATE_DISPOSE; | 1573 | bp->b_state |= XFS_BSTATE_DISPOSE; |
1572 | list_move(item, dispose); | 1574 | list_lru_isolate_move(lru, item, dispose); |
1573 | spin_unlock(&bp->b_lock); | 1575 | spin_unlock(&bp->b_lock); |
1574 | return LRU_REMOVED; | 1576 | return LRU_REMOVED; |
1575 | } | 1577 | } |
@@ -1583,10 +1585,9 @@ xfs_buftarg_shrink_scan( | |||
1583 | struct xfs_buftarg, bt_shrinker); | 1585 | struct xfs_buftarg, bt_shrinker); |
1584 | LIST_HEAD(dispose); | 1586 | LIST_HEAD(dispose); |
1585 | unsigned long freed; | 1587 | unsigned long freed; |
1586 | unsigned long nr_to_scan = sc->nr_to_scan; | ||
1587 | 1588 | ||
1588 | freed = list_lru_walk_node(&btp->bt_lru, sc->nid, xfs_buftarg_isolate, | 1589 | freed = list_lru_shrink_walk(&btp->bt_lru, sc, |
1589 | &dispose, &nr_to_scan); | 1590 | xfs_buftarg_isolate, &dispose); |
1590 | 1591 | ||
1591 | while (!list_empty(&dispose)) { | 1592 | while (!list_empty(&dispose)) { |
1592 | struct xfs_buf *bp; | 1593 | struct xfs_buf *bp; |
@@ -1605,7 +1606,7 @@ xfs_buftarg_shrink_count( | |||
1605 | { | 1606 | { |
1606 | struct xfs_buftarg *btp = container_of(shrink, | 1607 | struct xfs_buftarg *btp = container_of(shrink, |
1607 | struct xfs_buftarg, bt_shrinker); | 1608 | struct xfs_buftarg, bt_shrinker); |
1608 | return list_lru_count_node(&btp->bt_lru, sc->nid); | 1609 | return list_lru_shrink_count(&btp->bt_lru, sc); |
1609 | } | 1610 | } |
1610 | 1611 | ||
1611 | void | 1612 | void |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 3f9bd58edec7..507d96a57ac7 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -319,6 +319,10 @@ xfs_buf_item_format( | |||
319 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 319 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
320 | ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || | 320 | ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || |
321 | (bip->bli_flags & XFS_BLI_STALE)); | 321 | (bip->bli_flags & XFS_BLI_STALE)); |
322 | ASSERT((bip->bli_flags & XFS_BLI_STALE) || | ||
323 | (xfs_blft_from_flags(&bip->__bli_format) > XFS_BLFT_UNKNOWN_BUF | ||
324 | && xfs_blft_from_flags(&bip->__bli_format) < XFS_BLFT_MAX_BUF)); | ||
325 | |||
322 | 326 | ||
323 | /* | 327 | /* |
324 | * If it is an inode buffer, transfer the in-memory state to the | 328 | * If it is an inode buffer, transfer the in-memory state to the |
@@ -535,7 +539,7 @@ xfs_buf_item_push( | |||
535 | if ((bp->b_flags & XBF_WRITE_FAIL) && | 539 | if ((bp->b_flags & XBF_WRITE_FAIL) && |
536 | ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) { | 540 | ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) { |
537 | xfs_warn(bp->b_target->bt_mount, | 541 | xfs_warn(bp->b_target->bt_mount, |
538 | "Detected failing async write on buffer block 0x%llx. Retrying async write.\n", | 542 | "Detected failing async write on buffer block 0x%llx. Retrying async write.", |
539 | (long long)bp->b_bn); | 543 | (long long)bp->b_bn); |
540 | } | 544 | } |
541 | 545 | ||
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index c24c67e22a2a..2f536f33cd26 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h | |||
@@ -86,7 +86,7 @@ static inline void xfs_dqflock(xfs_dquot_t *dqp) | |||
86 | wait_for_completion(&dqp->q_flush); | 86 | wait_for_completion(&dqp->q_flush); |
87 | } | 87 | } |
88 | 88 | ||
89 | static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp) | 89 | static inline bool xfs_dqflock_nowait(xfs_dquot_t *dqp) |
90 | { | 90 | { |
91 | return try_wait_for_completion(&dqp->q_flush); | 91 | return try_wait_for_completion(&dqp->q_flush); |
92 | } | 92 | } |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 13e974e6a889..1cdba95c78cb 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -127,6 +127,42 @@ xfs_iozero( | |||
127 | return (-status); | 127 | return (-status); |
128 | } | 128 | } |
129 | 129 | ||
130 | int | ||
131 | xfs_update_prealloc_flags( | ||
132 | struct xfs_inode *ip, | ||
133 | enum xfs_prealloc_flags flags) | ||
134 | { | ||
135 | struct xfs_trans *tp; | ||
136 | int error; | ||
137 | |||
138 | tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID); | ||
139 | error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0); | ||
140 | if (error) { | ||
141 | xfs_trans_cancel(tp, 0); | ||
142 | return error; | ||
143 | } | ||
144 | |||
145 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
146 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | ||
147 | |||
148 | if (!(flags & XFS_PREALLOC_INVISIBLE)) { | ||
149 | ip->i_d.di_mode &= ~S_ISUID; | ||
150 | if (ip->i_d.di_mode & S_IXGRP) | ||
151 | ip->i_d.di_mode &= ~S_ISGID; | ||
152 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
153 | } | ||
154 | |||
155 | if (flags & XFS_PREALLOC_SET) | ||
156 | ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; | ||
157 | if (flags & XFS_PREALLOC_CLEAR) | ||
158 | ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; | ||
159 | |||
160 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
161 | if (flags & XFS_PREALLOC_SYNC) | ||
162 | xfs_trans_set_sync(tp); | ||
163 | return xfs_trans_commit(tp, 0); | ||
164 | } | ||
165 | |||
130 | /* | 166 | /* |
131 | * Fsync operations on directories are much simpler than on regular files, | 167 | * Fsync operations on directories are much simpler than on regular files, |
132 | * as there is no file data to flush, and thus also no need for explicit | 168 | * as there is no file data to flush, and thus also no need for explicit |
@@ -699,7 +735,7 @@ xfs_file_buffered_aio_write( | |||
699 | 735 | ||
700 | iov_iter_truncate(from, count); | 736 | iov_iter_truncate(from, count); |
701 | /* We can write back this queue in page reclaim */ | 737 | /* We can write back this queue in page reclaim */ |
702 | current->backing_dev_info = mapping->backing_dev_info; | 738 | current->backing_dev_info = inode_to_bdi(inode); |
703 | 739 | ||
704 | write_retry: | 740 | write_retry: |
705 | trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); | 741 | trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); |
@@ -784,8 +820,8 @@ xfs_file_fallocate( | |||
784 | { | 820 | { |
785 | struct inode *inode = file_inode(file); | 821 | struct inode *inode = file_inode(file); |
786 | struct xfs_inode *ip = XFS_I(inode); | 822 | struct xfs_inode *ip = XFS_I(inode); |
787 | struct xfs_trans *tp; | ||
788 | long error; | 823 | long error; |
824 | enum xfs_prealloc_flags flags = 0; | ||
789 | loff_t new_size = 0; | 825 | loff_t new_size = 0; |
790 | 826 | ||
791 | if (!S_ISREG(inode->i_mode)) | 827 | if (!S_ISREG(inode->i_mode)) |
@@ -822,6 +858,8 @@ xfs_file_fallocate( | |||
822 | if (error) | 858 | if (error) |
823 | goto out_unlock; | 859 | goto out_unlock; |
824 | } else { | 860 | } else { |
861 | flags |= XFS_PREALLOC_SET; | ||
862 | |||
825 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | 863 | if (!(mode & FALLOC_FL_KEEP_SIZE) && |
826 | offset + len > i_size_read(inode)) { | 864 | offset + len > i_size_read(inode)) { |
827 | new_size = offset + len; | 865 | new_size = offset + len; |
@@ -839,28 +877,10 @@ xfs_file_fallocate( | |||
839 | goto out_unlock; | 877 | goto out_unlock; |
840 | } | 878 | } |
841 | 879 | ||
842 | tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID); | ||
843 | error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0); | ||
844 | if (error) { | ||
845 | xfs_trans_cancel(tp, 0); | ||
846 | goto out_unlock; | ||
847 | } | ||
848 | |||
849 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
850 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | ||
851 | ip->i_d.di_mode &= ~S_ISUID; | ||
852 | if (ip->i_d.di_mode & S_IXGRP) | ||
853 | ip->i_d.di_mode &= ~S_ISGID; | ||
854 | |||
855 | if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE))) | ||
856 | ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; | ||
857 | |||
858 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
859 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
860 | |||
861 | if (file->f_flags & O_DSYNC) | 880 | if (file->f_flags & O_DSYNC) |
862 | xfs_trans_set_sync(tp); | 881 | flags |= XFS_PREALLOC_SYNC; |
863 | error = xfs_trans_commit(tp, 0); | 882 | |
883 | error = xfs_update_prealloc_flags(ip, flags); | ||
864 | if (error) | 884 | if (error) |
865 | goto out_unlock; | 885 | goto out_unlock; |
866 | 886 | ||
@@ -1384,5 +1404,4 @@ static const struct vm_operations_struct xfs_file_vm_ops = { | |||
1384 | .fault = filemap_fault, | 1404 | .fault = filemap_fault, |
1385 | .map_pages = filemap_map_pages, | 1405 | .map_pages = filemap_map_pages, |
1386 | .page_mkwrite = xfs_vm_page_mkwrite, | 1406 | .page_mkwrite = xfs_vm_page_mkwrite, |
1387 | .remap_pages = generic_file_remap_pages, | ||
1388 | }; | 1407 | }; |
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index fdc64220fcb0..fba6532efba4 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -488,6 +488,7 @@ xfs_growfs_data_private( | |||
488 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, nfree); | 488 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, nfree); |
489 | if (dpct) | 489 | if (dpct) |
490 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); | 490 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); |
491 | xfs_trans_set_sync(tp); | ||
491 | error = xfs_trans_commit(tp, 0); | 492 | error = xfs_trans_commit(tp, 0); |
492 | if (error) | 493 | if (error) |
493 | return error; | 494 | return error; |
@@ -541,7 +542,7 @@ xfs_growfs_data_private( | |||
541 | saved_error = error; | 542 | saved_error = error; |
542 | continue; | 543 | continue; |
543 | } | 544 | } |
544 | xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, XFS_SB_ALL_BITS); | 545 | xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); |
545 | 546 | ||
546 | error = xfs_bwrite(bp); | 547 | error = xfs_bwrite(bp); |
547 | xfs_buf_relse(bp); | 548 | xfs_buf_relse(bp); |
@@ -756,37 +757,6 @@ out: | |||
756 | return 0; | 757 | return 0; |
757 | } | 758 | } |
758 | 759 | ||
759 | /* | ||
760 | * Dump a transaction into the log that contains no real change. This is needed | ||
761 | * to be able to make the log dirty or stamp the current tail LSN into the log | ||
762 | * during the covering operation. | ||
763 | * | ||
764 | * We cannot use an inode here for this - that will push dirty state back up | ||
765 | * into the VFS and then periodic inode flushing will prevent log covering from | ||
766 | * making progress. Hence we log a field in the superblock instead and use a | ||
767 | * synchronous transaction to ensure the superblock is immediately unpinned | ||
768 | * and can be written back. | ||
769 | */ | ||
770 | int | ||
771 | xfs_fs_log_dummy( | ||
772 | xfs_mount_t *mp) | ||
773 | { | ||
774 | xfs_trans_t *tp; | ||
775 | int error; | ||
776 | |||
777 | tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); | ||
778 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); | ||
779 | if (error) { | ||
780 | xfs_trans_cancel(tp, 0); | ||
781 | return error; | ||
782 | } | ||
783 | |||
784 | /* log the UUID because it is an unchanging field */ | ||
785 | xfs_mod_sb(tp, XFS_SB_UUID); | ||
786 | xfs_trans_set_sync(tp); | ||
787 | return xfs_trans_commit(tp, 0); | ||
788 | } | ||
789 | |||
790 | int | 760 | int |
791 | xfs_fs_goingdown( | 761 | xfs_fs_goingdown( |
792 | xfs_mount_t *mp, | 762 | xfs_mount_t *mp, |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 41f804e740d7..daafa1f6d260 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -1995,6 +1995,7 @@ xfs_iunlink( | |||
1995 | agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); | 1995 | agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); |
1996 | offset = offsetof(xfs_agi_t, agi_unlinked) + | 1996 | offset = offsetof(xfs_agi_t, agi_unlinked) + |
1997 | (sizeof(xfs_agino_t) * bucket_index); | 1997 | (sizeof(xfs_agino_t) * bucket_index); |
1998 | xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF); | ||
1998 | xfs_trans_log_buf(tp, agibp, offset, | 1999 | xfs_trans_log_buf(tp, agibp, offset, |
1999 | (offset + sizeof(xfs_agino_t) - 1)); | 2000 | (offset + sizeof(xfs_agino_t) - 1)); |
2000 | return 0; | 2001 | return 0; |
@@ -2086,6 +2087,7 @@ xfs_iunlink_remove( | |||
2086 | agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); | 2087 | agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); |
2087 | offset = offsetof(xfs_agi_t, agi_unlinked) + | 2088 | offset = offsetof(xfs_agi_t, agi_unlinked) + |
2088 | (sizeof(xfs_agino_t) * bucket_index); | 2089 | (sizeof(xfs_agino_t) * bucket_index); |
2090 | xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF); | ||
2089 | xfs_trans_log_buf(tp, agibp, offset, | 2091 | xfs_trans_log_buf(tp, agibp, offset, |
2090 | (offset + sizeof(xfs_agino_t) - 1)); | 2092 | (offset + sizeof(xfs_agino_t) - 1)); |
2091 | } else { | 2093 | } else { |
@@ -2656,6 +2658,124 @@ xfs_sort_for_rename( | |||
2656 | } | 2658 | } |
2657 | 2659 | ||
2658 | /* | 2660 | /* |
2661 | * xfs_cross_rename() | ||
2662 | * | ||
2663 | * responsible for handling RENAME_EXCHANGE flag in renameat2() sytemcall | ||
2664 | */ | ||
2665 | STATIC int | ||
2666 | xfs_cross_rename( | ||
2667 | struct xfs_trans *tp, | ||
2668 | struct xfs_inode *dp1, | ||
2669 | struct xfs_name *name1, | ||
2670 | struct xfs_inode *ip1, | ||
2671 | struct xfs_inode *dp2, | ||
2672 | struct xfs_name *name2, | ||
2673 | struct xfs_inode *ip2, | ||
2674 | struct xfs_bmap_free *free_list, | ||
2675 | xfs_fsblock_t *first_block, | ||
2676 | int spaceres) | ||
2677 | { | ||
2678 | int error = 0; | ||
2679 | int ip1_flags = 0; | ||
2680 | int ip2_flags = 0; | ||
2681 | int dp2_flags = 0; | ||
2682 | |||
2683 | /* Swap inode number for dirent in first parent */ | ||
2684 | error = xfs_dir_replace(tp, dp1, name1, | ||
2685 | ip2->i_ino, | ||
2686 | first_block, free_list, spaceres); | ||
2687 | if (error) | ||
2688 | goto out; | ||
2689 | |||
2690 | /* Swap inode number for dirent in second parent */ | ||
2691 | error = xfs_dir_replace(tp, dp2, name2, | ||
2692 | ip1->i_ino, | ||
2693 | first_block, free_list, spaceres); | ||
2694 | if (error) | ||
2695 | goto out; | ||
2696 | |||
2697 | /* | ||
2698 | * If we're renaming one or more directories across different parents, | ||
2699 | * update the respective ".." entries (and link counts) to match the new | ||
2700 | * parents. | ||
2701 | */ | ||
2702 | if (dp1 != dp2) { | ||
2703 | dp2_flags = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; | ||
2704 | |||
2705 | if (S_ISDIR(ip2->i_d.di_mode)) { | ||
2706 | error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot, | ||
2707 | dp1->i_ino, first_block, | ||
2708 | free_list, spaceres); | ||
2709 | if (error) | ||
2710 | goto out; | ||
2711 | |||
2712 | /* transfer ip2 ".." reference to dp1 */ | ||
2713 | if (!S_ISDIR(ip1->i_d.di_mode)) { | ||
2714 | error = xfs_droplink(tp, dp2); | ||
2715 | if (error) | ||
2716 | goto out; | ||
2717 | error = xfs_bumplink(tp, dp1); | ||
2718 | if (error) | ||
2719 | goto out; | ||
2720 | } | ||
2721 | |||
2722 | /* | ||
2723 | * Although ip1 isn't changed here, userspace needs | ||
2724 | * to be warned about the change, so that applications | ||
2725 | * relying on it (like backup ones), will properly | ||
2726 | * notify the change | ||
2727 | */ | ||
2728 | ip1_flags |= XFS_ICHGTIME_CHG; | ||
2729 | ip2_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; | ||
2730 | } | ||
2731 | |||
2732 | if (S_ISDIR(ip1->i_d.di_mode)) { | ||
2733 | error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot, | ||
2734 | dp2->i_ino, first_block, | ||
2735 | free_list, spaceres); | ||
2736 | if (error) | ||
2737 | goto out; | ||
2738 | |||
2739 | /* transfer ip1 ".." reference to dp2 */ | ||
2740 | if (!S_ISDIR(ip2->i_d.di_mode)) { | ||
2741 | error = xfs_droplink(tp, dp1); | ||
2742 | if (error) | ||
2743 | goto out; | ||
2744 | error = xfs_bumplink(tp, dp2); | ||
2745 | if (error) | ||
2746 | goto out; | ||
2747 | } | ||
2748 | |||
2749 | /* | ||
2750 | * Although ip2 isn't changed here, userspace needs | ||
2751 | * to be warned about the change, so that applications | ||
2752 | * relying on it (like backup ones), will properly | ||
2753 | * notify the change | ||
2754 | */ | ||
2755 | ip1_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; | ||
2756 | ip2_flags |= XFS_ICHGTIME_CHG; | ||
2757 | } | ||
2758 | } | ||
2759 | |||
2760 | if (ip1_flags) { | ||
2761 | xfs_trans_ichgtime(tp, ip1, ip1_flags); | ||
2762 | xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE); | ||
2763 | } | ||
2764 | if (ip2_flags) { | ||
2765 | xfs_trans_ichgtime(tp, ip2, ip2_flags); | ||
2766 | xfs_trans_log_inode(tp, ip2, XFS_ILOG_CORE); | ||
2767 | } | ||
2768 | if (dp2_flags) { | ||
2769 | xfs_trans_ichgtime(tp, dp2, dp2_flags); | ||
2770 | xfs_trans_log_inode(tp, dp2, XFS_ILOG_CORE); | ||
2771 | } | ||
2772 | xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
2773 | xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE); | ||
2774 | out: | ||
2775 | return error; | ||
2776 | } | ||
2777 | |||
2778 | /* | ||
2659 | * xfs_rename | 2779 | * xfs_rename |
2660 | */ | 2780 | */ |
2661 | int | 2781 | int |
@@ -2665,7 +2785,8 @@ xfs_rename( | |||
2665 | xfs_inode_t *src_ip, | 2785 | xfs_inode_t *src_ip, |
2666 | xfs_inode_t *target_dp, | 2786 | xfs_inode_t *target_dp, |
2667 | struct xfs_name *target_name, | 2787 | struct xfs_name *target_name, |
2668 | xfs_inode_t *target_ip) | 2788 | xfs_inode_t *target_ip, |
2789 | unsigned int flags) | ||
2669 | { | 2790 | { |
2670 | xfs_trans_t *tp = NULL; | 2791 | xfs_trans_t *tp = NULL; |
2671 | xfs_mount_t *mp = src_dp->i_mount; | 2792 | xfs_mount_t *mp = src_dp->i_mount; |
@@ -2743,6 +2864,18 @@ xfs_rename( | |||
2743 | } | 2864 | } |
2744 | 2865 | ||
2745 | /* | 2866 | /* |
2867 | * Handle RENAME_EXCHANGE flags | ||
2868 | */ | ||
2869 | if (flags & RENAME_EXCHANGE) { | ||
2870 | error = xfs_cross_rename(tp, src_dp, src_name, src_ip, | ||
2871 | target_dp, target_name, target_ip, | ||
2872 | &free_list, &first_block, spaceres); | ||
2873 | if (error) | ||
2874 | goto abort_return; | ||
2875 | goto finish_rename; | ||
2876 | } | ||
2877 | |||
2878 | /* | ||
2746 | * Set up the target. | 2879 | * Set up the target. |
2747 | */ | 2880 | */ |
2748 | if (target_ip == NULL) { | 2881 | if (target_ip == NULL) { |
@@ -2881,6 +3014,7 @@ xfs_rename( | |||
2881 | if (new_parent) | 3014 | if (new_parent) |
2882 | xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); | 3015 | xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); |
2883 | 3016 | ||
3017 | finish_rename: | ||
2884 | /* | 3018 | /* |
2885 | * If this is a synchronous mount, make sure that the | 3019 | * If this is a synchronous mount, make sure that the |
2886 | * rename transaction goes to disk before returning to | 3020 | * rename transaction goes to disk before returning to |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 4ed2ba9342dc..86cd6b39bed7 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -338,7 +338,7 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, | |||
338 | int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, | 338 | int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, |
339 | struct xfs_inode *src_ip, struct xfs_inode *target_dp, | 339 | struct xfs_inode *src_ip, struct xfs_inode *target_dp, |
340 | struct xfs_name *target_name, | 340 | struct xfs_name *target_name, |
341 | struct xfs_inode *target_ip); | 341 | struct xfs_inode *target_ip, unsigned int flags); |
342 | 342 | ||
343 | void xfs_ilock(xfs_inode_t *, uint); | 343 | void xfs_ilock(xfs_inode_t *, uint); |
344 | int xfs_ilock_nowait(xfs_inode_t *, uint); | 344 | int xfs_ilock_nowait(xfs_inode_t *, uint); |
@@ -377,6 +377,15 @@ int xfs_droplink(struct xfs_trans *, struct xfs_inode *); | |||
377 | int xfs_bumplink(struct xfs_trans *, struct xfs_inode *); | 377 | int xfs_bumplink(struct xfs_trans *, struct xfs_inode *); |
378 | 378 | ||
379 | /* from xfs_file.c */ | 379 | /* from xfs_file.c */ |
380 | enum xfs_prealloc_flags { | ||
381 | XFS_PREALLOC_SET = (1 << 1), | ||
382 | XFS_PREALLOC_CLEAR = (1 << 2), | ||
383 | XFS_PREALLOC_SYNC = (1 << 3), | ||
384 | XFS_PREALLOC_INVISIBLE = (1 << 4), | ||
385 | }; | ||
386 | |||
387 | int xfs_update_prealloc_flags(struct xfs_inode *, | ||
388 | enum xfs_prealloc_flags); | ||
380 | int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); | 389 | int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); |
381 | int xfs_iozero(struct xfs_inode *, loff_t, size_t); | 390 | int xfs_iozero(struct xfs_inode *, loff_t, size_t); |
382 | 391 | ||
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index a1831980a68e..f7afb86c9148 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c | |||
@@ -606,11 +606,8 @@ xfs_ioc_space( | |||
606 | unsigned int cmd, | 606 | unsigned int cmd, |
607 | xfs_flock64_t *bf) | 607 | xfs_flock64_t *bf) |
608 | { | 608 | { |
609 | struct xfs_mount *mp = ip->i_mount; | ||
610 | struct xfs_trans *tp; | ||
611 | struct iattr iattr; | 609 | struct iattr iattr; |
612 | bool setprealloc = false; | 610 | enum xfs_prealloc_flags flags = 0; |
613 | bool clrprealloc = false; | ||
614 | int error; | 611 | int error; |
615 | 612 | ||
616 | /* | 613 | /* |
@@ -630,6 +627,11 @@ xfs_ioc_space( | |||
630 | if (!S_ISREG(inode->i_mode)) | 627 | if (!S_ISREG(inode->i_mode)) |
631 | return -EINVAL; | 628 | return -EINVAL; |
632 | 629 | ||
630 | if (filp->f_flags & O_DSYNC) | ||
631 | flags |= XFS_PREALLOC_SYNC; | ||
632 | if (ioflags & XFS_IO_INVIS) | ||
633 | flags |= XFS_PREALLOC_INVISIBLE; | ||
634 | |||
633 | error = mnt_want_write_file(filp); | 635 | error = mnt_want_write_file(filp); |
634 | if (error) | 636 | if (error) |
635 | return error; | 637 | return error; |
@@ -673,25 +675,23 @@ xfs_ioc_space( | |||
673 | } | 675 | } |
674 | 676 | ||
675 | if (bf->l_start < 0 || | 677 | if (bf->l_start < 0 || |
676 | bf->l_start > mp->m_super->s_maxbytes || | 678 | bf->l_start > inode->i_sb->s_maxbytes || |
677 | bf->l_start + bf->l_len < 0 || | 679 | bf->l_start + bf->l_len < 0 || |
678 | bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) { | 680 | bf->l_start + bf->l_len >= inode->i_sb->s_maxbytes) { |
679 | error = -EINVAL; | 681 | error = -EINVAL; |
680 | goto out_unlock; | 682 | goto out_unlock; |
681 | } | 683 | } |
682 | 684 | ||
683 | switch (cmd) { | 685 | switch (cmd) { |
684 | case XFS_IOC_ZERO_RANGE: | 686 | case XFS_IOC_ZERO_RANGE: |
687 | flags |= XFS_PREALLOC_SET; | ||
685 | error = xfs_zero_file_space(ip, bf->l_start, bf->l_len); | 688 | error = xfs_zero_file_space(ip, bf->l_start, bf->l_len); |
686 | if (!error) | ||
687 | setprealloc = true; | ||
688 | break; | 689 | break; |
689 | case XFS_IOC_RESVSP: | 690 | case XFS_IOC_RESVSP: |
690 | case XFS_IOC_RESVSP64: | 691 | case XFS_IOC_RESVSP64: |
692 | flags |= XFS_PREALLOC_SET; | ||
691 | error = xfs_alloc_file_space(ip, bf->l_start, bf->l_len, | 693 | error = xfs_alloc_file_space(ip, bf->l_start, bf->l_len, |
692 | XFS_BMAPI_PREALLOC); | 694 | XFS_BMAPI_PREALLOC); |
693 | if (!error) | ||
694 | setprealloc = true; | ||
695 | break; | 695 | break; |
696 | case XFS_IOC_UNRESVSP: | 696 | case XFS_IOC_UNRESVSP: |
697 | case XFS_IOC_UNRESVSP64: | 697 | case XFS_IOC_UNRESVSP64: |
@@ -701,6 +701,7 @@ xfs_ioc_space( | |||
701 | case XFS_IOC_ALLOCSP64: | 701 | case XFS_IOC_ALLOCSP64: |
702 | case XFS_IOC_FREESP: | 702 | case XFS_IOC_FREESP: |
703 | case XFS_IOC_FREESP64: | 703 | case XFS_IOC_FREESP64: |
704 | flags |= XFS_PREALLOC_CLEAR; | ||
704 | if (bf->l_start > XFS_ISIZE(ip)) { | 705 | if (bf->l_start > XFS_ISIZE(ip)) { |
705 | error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), | 706 | error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), |
706 | bf->l_start - XFS_ISIZE(ip), 0); | 707 | bf->l_start - XFS_ISIZE(ip), 0); |
@@ -712,8 +713,6 @@ xfs_ioc_space( | |||
712 | iattr.ia_size = bf->l_start; | 713 | iattr.ia_size = bf->l_start; |
713 | 714 | ||
714 | error = xfs_setattr_size(ip, &iattr); | 715 | error = xfs_setattr_size(ip, &iattr); |
715 | if (!error) | ||
716 | clrprealloc = true; | ||
717 | break; | 716 | break; |
718 | default: | 717 | default: |
719 | ASSERT(0); | 718 | ASSERT(0); |
@@ -723,32 +722,7 @@ xfs_ioc_space( | |||
723 | if (error) | 722 | if (error) |
724 | goto out_unlock; | 723 | goto out_unlock; |
725 | 724 | ||
726 | tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); | 725 | error = xfs_update_prealloc_flags(ip, flags); |
727 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_writeid, 0, 0); | ||
728 | if (error) { | ||
729 | xfs_trans_cancel(tp, 0); | ||
730 | goto out_unlock; | ||
731 | } | ||
732 | |||
733 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
734 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | ||
735 | |||
736 | if (!(ioflags & XFS_IO_INVIS)) { | ||
737 | ip->i_d.di_mode &= ~S_ISUID; | ||
738 | if (ip->i_d.di_mode & S_IXGRP) | ||
739 | ip->i_d.di_mode &= ~S_ISGID; | ||
740 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
741 | } | ||
742 | |||
743 | if (setprealloc) | ||
744 | ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; | ||
745 | else if (clrprealloc) | ||
746 | ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; | ||
747 | |||
748 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
749 | if (filp->f_flags & O_DSYNC) | ||
750 | xfs_trans_set_sync(tp); | ||
751 | error = xfs_trans_commit(tp, 0); | ||
752 | 726 | ||
753 | out_unlock: | 727 | out_unlock: |
754 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | 728 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
@@ -1013,20 +987,182 @@ xfs_diflags_to_linux( | |||
1013 | inode->i_flags &= ~S_NOATIME; | 987 | inode->i_flags &= ~S_NOATIME; |
1014 | } | 988 | } |
1015 | 989 | ||
1016 | #define FSX_PROJID 1 | 990 | static int |
1017 | #define FSX_EXTSIZE 2 | 991 | xfs_ioctl_setattr_xflags( |
1018 | #define FSX_XFLAGS 4 | 992 | struct xfs_trans *tp, |
1019 | #define FSX_NONBLOCK 8 | 993 | struct xfs_inode *ip, |
994 | struct fsxattr *fa) | ||
995 | { | ||
996 | struct xfs_mount *mp = ip->i_mount; | ||
997 | |||
998 | /* Can't change realtime flag if any extents are allocated. */ | ||
999 | if ((ip->i_d.di_nextents || ip->i_delayed_blks) && | ||
1000 | XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & XFS_XFLAG_REALTIME)) | ||
1001 | return -EINVAL; | ||
1002 | |||
1003 | /* If realtime flag is set then must have realtime device */ | ||
1004 | if (fa->fsx_xflags & XFS_XFLAG_REALTIME) { | ||
1005 | if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 || | ||
1006 | (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) | ||
1007 | return -EINVAL; | ||
1008 | } | ||
1009 | |||
1010 | /* | ||
1011 | * Can't modify an immutable/append-only file unless | ||
1012 | * we have appropriate permission. | ||
1013 | */ | ||
1014 | if (((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND)) || | ||
1015 | (fa->fsx_xflags & (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && | ||
1016 | !capable(CAP_LINUX_IMMUTABLE)) | ||
1017 | return -EPERM; | ||
1018 | |||
1019 | xfs_set_diflags(ip, fa->fsx_xflags); | ||
1020 | xfs_diflags_to_linux(ip); | ||
1021 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); | ||
1022 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
1023 | XFS_STATS_INC(xs_ig_attrchg); | ||
1024 | return 0; | ||
1025 | } | ||
1026 | |||
1027 | /* | ||
1028 | * Set up the transaction structure for the setattr operation, checking that we | ||
1029 | * have permission to do so. On success, return a clean transaction and the | ||
1030 | * inode locked exclusively ready for further operation specific checks. On | ||
1031 | * failure, return an error without modifying or locking the inode. | ||
1032 | */ | ||
1033 | static struct xfs_trans * | ||
1034 | xfs_ioctl_setattr_get_trans( | ||
1035 | struct xfs_inode *ip) | ||
1036 | { | ||
1037 | struct xfs_mount *mp = ip->i_mount; | ||
1038 | struct xfs_trans *tp; | ||
1039 | int error; | ||
1040 | |||
1041 | if (mp->m_flags & XFS_MOUNT_RDONLY) | ||
1042 | return ERR_PTR(-EROFS); | ||
1043 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1044 | return ERR_PTR(-EIO); | ||
1045 | |||
1046 | tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); | ||
1047 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); | ||
1048 | if (error) | ||
1049 | goto out_cancel; | ||
1050 | |||
1051 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
1052 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | ||
1053 | |||
1054 | /* | ||
1055 | * CAP_FOWNER overrides the following restrictions: | ||
1056 | * | ||
1057 | * The user ID of the calling process must be equal to the file owner | ||
1058 | * ID, except in cases where the CAP_FSETID capability is applicable. | ||
1059 | */ | ||
1060 | if (!inode_owner_or_capable(VFS_I(ip))) { | ||
1061 | error = -EPERM; | ||
1062 | goto out_cancel; | ||
1063 | } | ||
1064 | |||
1065 | if (mp->m_flags & XFS_MOUNT_WSYNC) | ||
1066 | xfs_trans_set_sync(tp); | ||
1067 | |||
1068 | return tp; | ||
1069 | |||
1070 | out_cancel: | ||
1071 | xfs_trans_cancel(tp, 0); | ||
1072 | return ERR_PTR(error); | ||
1073 | } | ||
1074 | |||
1075 | /* | ||
1076 | * extent size hint validation is somewhat cumbersome. Rules are: | ||
1077 | * | ||
1078 | * 1. extent size hint is only valid for directories and regular files | ||
1079 | * 2. XFS_XFLAG_EXTSIZE is only valid for regular files | ||
1080 | * 3. XFS_XFLAG_EXTSZINHERIT is only valid for directories. | ||
1081 | * 4. can only be changed on regular files if no extents are allocated | ||
1082 | * 5. can be changed on directories at any time | ||
1083 | * 6. extsize hint of 0 turns off hints, clears inode flags. | ||
1084 | * 7. Extent size must be a multiple of the appropriate block size. | ||
1085 | * 8. for non-realtime files, the extent size hint must be limited | ||
1086 | * to half the AG size to avoid alignment extending the extent beyond the | ||
1087 | * limits of the AG. | ||
1088 | */ | ||
1089 | static int | ||
1090 | xfs_ioctl_setattr_check_extsize( | ||
1091 | struct xfs_inode *ip, | ||
1092 | struct fsxattr *fa) | ||
1093 | { | ||
1094 | struct xfs_mount *mp = ip->i_mount; | ||
1095 | |||
1096 | if ((fa->fsx_xflags & XFS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode)) | ||
1097 | return -EINVAL; | ||
1098 | |||
1099 | if ((fa->fsx_xflags & XFS_XFLAG_EXTSZINHERIT) && | ||
1100 | !S_ISDIR(ip->i_d.di_mode)) | ||
1101 | return -EINVAL; | ||
1102 | |||
1103 | if (S_ISREG(ip->i_d.di_mode) && ip->i_d.di_nextents && | ||
1104 | ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) | ||
1105 | return -EINVAL; | ||
1106 | |||
1107 | if (fa->fsx_extsize != 0) { | ||
1108 | xfs_extlen_t size; | ||
1109 | xfs_fsblock_t extsize_fsb; | ||
1110 | |||
1111 | extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); | ||
1112 | if (extsize_fsb > MAXEXTLEN) | ||
1113 | return -EINVAL; | ||
1114 | |||
1115 | if (XFS_IS_REALTIME_INODE(ip) || | ||
1116 | (fa->fsx_xflags & XFS_XFLAG_REALTIME)) { | ||
1117 | size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; | ||
1118 | } else { | ||
1119 | size = mp->m_sb.sb_blocksize; | ||
1120 | if (extsize_fsb > mp->m_sb.sb_agblocks / 2) | ||
1121 | return -EINVAL; | ||
1122 | } | ||
1123 | |||
1124 | if (fa->fsx_extsize % size) | ||
1125 | return -EINVAL; | ||
1126 | } else | ||
1127 | fa->fsx_xflags &= ~(XFS_XFLAG_EXTSIZE | XFS_XFLAG_EXTSZINHERIT); | ||
1128 | |||
1129 | return 0; | ||
1130 | } | ||
1131 | |||
1132 | static int | ||
1133 | xfs_ioctl_setattr_check_projid( | ||
1134 | struct xfs_inode *ip, | ||
1135 | struct fsxattr *fa) | ||
1136 | { | ||
1137 | /* Disallow 32bit project ids if projid32bit feature is not enabled. */ | ||
1138 | if (fa->fsx_projid > (__uint16_t)-1 && | ||
1139 | !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) | ||
1140 | return -EINVAL; | ||
1141 | |||
1142 | /* | ||
1143 | * Project Quota ID state is only allowed to change from within the init | ||
1144 | * namespace. Enforce that restriction only if we are trying to change | ||
1145 | * the quota ID state. Everything else is allowed in user namespaces. | ||
1146 | */ | ||
1147 | if (current_user_ns() == &init_user_ns) | ||
1148 | return 0; | ||
1149 | |||
1150 | if (xfs_get_projid(ip) != fa->fsx_projid) | ||
1151 | return -EINVAL; | ||
1152 | if ((fa->fsx_xflags & XFS_XFLAG_PROJINHERIT) != | ||
1153 | (ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)) | ||
1154 | return -EINVAL; | ||
1155 | |||
1156 | return 0; | ||
1157 | } | ||
1020 | 1158 | ||
1021 | STATIC int | 1159 | STATIC int |
1022 | xfs_ioctl_setattr( | 1160 | xfs_ioctl_setattr( |
1023 | xfs_inode_t *ip, | 1161 | xfs_inode_t *ip, |
1024 | struct fsxattr *fa, | 1162 | struct fsxattr *fa) |
1025 | int mask) | ||
1026 | { | 1163 | { |
1027 | struct xfs_mount *mp = ip->i_mount; | 1164 | struct xfs_mount *mp = ip->i_mount; |
1028 | struct xfs_trans *tp; | 1165 | struct xfs_trans *tp; |
1029 | unsigned int lock_flags = 0; | ||
1030 | struct xfs_dquot *udqp = NULL; | 1166 | struct xfs_dquot *udqp = NULL; |
1031 | struct xfs_dquot *pdqp = NULL; | 1167 | struct xfs_dquot *pdqp = NULL; |
1032 | struct xfs_dquot *olddquot = NULL; | 1168 | struct xfs_dquot *olddquot = NULL; |
@@ -1034,17 +1170,9 @@ xfs_ioctl_setattr( | |||
1034 | 1170 | ||
1035 | trace_xfs_ioctl_setattr(ip); | 1171 | trace_xfs_ioctl_setattr(ip); |
1036 | 1172 | ||
1037 | if (mp->m_flags & XFS_MOUNT_RDONLY) | 1173 | code = xfs_ioctl_setattr_check_projid(ip, fa); |
1038 | return -EROFS; | 1174 | if (code) |
1039 | if (XFS_FORCED_SHUTDOWN(mp)) | 1175 | return code; |
1040 | return -EIO; | ||
1041 | |||
1042 | /* | ||
1043 | * Disallow 32bit project ids when projid32bit feature is not enabled. | ||
1044 | */ | ||
1045 | if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) && | ||
1046 | !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) | ||
1047 | return -EINVAL; | ||
1048 | 1176 | ||
1049 | /* | 1177 | /* |
1050 | * If disk quotas is on, we make sure that the dquots do exist on disk, | 1178 | * If disk quotas is on, we make sure that the dquots do exist on disk, |
@@ -1054,7 +1182,7 @@ xfs_ioctl_setattr( | |||
1054 | * If the IDs do change before we take the ilock, we're covered | 1182 | * If the IDs do change before we take the ilock, we're covered |
1055 | * because the i_*dquot fields will get updated anyway. | 1183 | * because the i_*dquot fields will get updated anyway. |
1056 | */ | 1184 | */ |
1057 | if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) { | 1185 | if (XFS_IS_QUOTA_ON(mp)) { |
1058 | code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, | 1186 | code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, |
1059 | ip->i_d.di_gid, fa->fsx_projid, | 1187 | ip->i_d.di_gid, fa->fsx_projid, |
1060 | XFS_QMOPT_PQUOTA, &udqp, NULL, &pdqp); | 1188 | XFS_QMOPT_PQUOTA, &udqp, NULL, &pdqp); |
@@ -1062,175 +1190,49 @@ xfs_ioctl_setattr( | |||
1062 | return code; | 1190 | return code; |
1063 | } | 1191 | } |
1064 | 1192 | ||
1065 | /* | 1193 | tp = xfs_ioctl_setattr_get_trans(ip); |
1066 | * For the other attributes, we acquire the inode lock and | 1194 | if (IS_ERR(tp)) { |
1067 | * first do an error checking pass. | 1195 | code = PTR_ERR(tp); |
1068 | */ | 1196 | goto error_free_dquots; |
1069 | tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); | ||
1070 | code = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); | ||
1071 | if (code) | ||
1072 | goto error_return; | ||
1073 | |||
1074 | lock_flags = XFS_ILOCK_EXCL; | ||
1075 | xfs_ilock(ip, lock_flags); | ||
1076 | |||
1077 | /* | ||
1078 | * CAP_FOWNER overrides the following restrictions: | ||
1079 | * | ||
1080 | * The user ID of the calling process must be equal | ||
1081 | * to the file owner ID, except in cases where the | ||
1082 | * CAP_FSETID capability is applicable. | ||
1083 | */ | ||
1084 | if (!inode_owner_or_capable(VFS_I(ip))) { | ||
1085 | code = -EPERM; | ||
1086 | goto error_return; | ||
1087 | } | ||
1088 | |||
1089 | /* | ||
1090 | * Do a quota reservation only if projid is actually going to change. | ||
1091 | * Only allow changing of projid from init_user_ns since it is a | ||
1092 | * non user namespace aware identifier. | ||
1093 | */ | ||
1094 | if (mask & FSX_PROJID) { | ||
1095 | if (current_user_ns() != &init_user_ns) { | ||
1096 | code = -EINVAL; | ||
1097 | goto error_return; | ||
1098 | } | ||
1099 | |||
1100 | if (XFS_IS_QUOTA_RUNNING(mp) && | ||
1101 | XFS_IS_PQUOTA_ON(mp) && | ||
1102 | xfs_get_projid(ip) != fa->fsx_projid) { | ||
1103 | ASSERT(tp); | ||
1104 | code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, | ||
1105 | pdqp, capable(CAP_FOWNER) ? | ||
1106 | XFS_QMOPT_FORCE_RES : 0); | ||
1107 | if (code) /* out of quota */ | ||
1108 | goto error_return; | ||
1109 | } | ||
1110 | } | 1197 | } |
1111 | 1198 | ||
1112 | if (mask & FSX_EXTSIZE) { | ||
1113 | /* | ||
1114 | * Can't change extent size if any extents are allocated. | ||
1115 | */ | ||
1116 | if (ip->i_d.di_nextents && | ||
1117 | ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != | ||
1118 | fa->fsx_extsize)) { | ||
1119 | code = -EINVAL; /* EFBIG? */ | ||
1120 | goto error_return; | ||
1121 | } | ||
1122 | 1199 | ||
1123 | /* | 1200 | if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) && |
1124 | * Extent size must be a multiple of the appropriate block | 1201 | xfs_get_projid(ip) != fa->fsx_projid) { |
1125 | * size, if set at all. It must also be smaller than the | 1202 | code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, pdqp, |
1126 | * maximum extent size supported by the filesystem. | 1203 | capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); |
1127 | * | 1204 | if (code) /* out of quota */ |
1128 | * Also, for non-realtime files, limit the extent size hint to | 1205 | goto error_trans_cancel; |
1129 | * half the size of the AGs in the filesystem so alignment | ||
1130 | * doesn't result in extents larger than an AG. | ||
1131 | */ | ||
1132 | if (fa->fsx_extsize != 0) { | ||
1133 | xfs_extlen_t size; | ||
1134 | xfs_fsblock_t extsize_fsb; | ||
1135 | |||
1136 | extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); | ||
1137 | if (extsize_fsb > MAXEXTLEN) { | ||
1138 | code = -EINVAL; | ||
1139 | goto error_return; | ||
1140 | } | ||
1141 | |||
1142 | if (XFS_IS_REALTIME_INODE(ip) || | ||
1143 | ((mask & FSX_XFLAGS) && | ||
1144 | (fa->fsx_xflags & XFS_XFLAG_REALTIME))) { | ||
1145 | size = mp->m_sb.sb_rextsize << | ||
1146 | mp->m_sb.sb_blocklog; | ||
1147 | } else { | ||
1148 | size = mp->m_sb.sb_blocksize; | ||
1149 | if (extsize_fsb > mp->m_sb.sb_agblocks / 2) { | ||
1150 | code = -EINVAL; | ||
1151 | goto error_return; | ||
1152 | } | ||
1153 | } | ||
1154 | |||
1155 | if (fa->fsx_extsize % size) { | ||
1156 | code = -EINVAL; | ||
1157 | goto error_return; | ||
1158 | } | ||
1159 | } | ||
1160 | } | 1206 | } |
1161 | 1207 | ||
1208 | code = xfs_ioctl_setattr_check_extsize(ip, fa); | ||
1209 | if (code) | ||
1210 | goto error_trans_cancel; | ||
1162 | 1211 | ||
1163 | if (mask & FSX_XFLAGS) { | 1212 | code = xfs_ioctl_setattr_xflags(tp, ip, fa); |
1164 | /* | 1213 | if (code) |
1165 | * Can't change realtime flag if any extents are allocated. | 1214 | goto error_trans_cancel; |
1166 | */ | ||
1167 | if ((ip->i_d.di_nextents || ip->i_delayed_blks) && | ||
1168 | (XFS_IS_REALTIME_INODE(ip)) != | ||
1169 | (fa->fsx_xflags & XFS_XFLAG_REALTIME)) { | ||
1170 | code = -EINVAL; /* EFBIG? */ | ||
1171 | goto error_return; | ||
1172 | } | ||
1173 | |||
1174 | /* | ||
1175 | * If realtime flag is set then must have realtime data. | ||
1176 | */ | ||
1177 | if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) { | ||
1178 | if ((mp->m_sb.sb_rblocks == 0) || | ||
1179 | (mp->m_sb.sb_rextsize == 0) || | ||
1180 | (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { | ||
1181 | code = -EINVAL; | ||
1182 | goto error_return; | ||
1183 | } | ||
1184 | } | ||
1185 | |||
1186 | /* | ||
1187 | * Can't modify an immutable/append-only file unless | ||
1188 | * we have appropriate permission. | ||
1189 | */ | ||
1190 | if ((ip->i_d.di_flags & | ||
1191 | (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) || | ||
1192 | (fa->fsx_xflags & | ||
1193 | (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && | ||
1194 | !capable(CAP_LINUX_IMMUTABLE)) { | ||
1195 | code = -EPERM; | ||
1196 | goto error_return; | ||
1197 | } | ||
1198 | } | ||
1199 | |||
1200 | xfs_trans_ijoin(tp, ip, 0); | ||
1201 | 1215 | ||
1202 | /* | 1216 | /* |
1203 | * Change file ownership. Must be the owner or privileged. | 1217 | * Change file ownership. Must be the owner or privileged. CAP_FSETID |
1218 | * overrides the following restrictions: | ||
1219 | * | ||
1220 | * The set-user-ID and set-group-ID bits of a file will be cleared upon | ||
1221 | * successful return from chown() | ||
1204 | */ | 1222 | */ |
1205 | if (mask & FSX_PROJID) { | ||
1206 | /* | ||
1207 | * CAP_FSETID overrides the following restrictions: | ||
1208 | * | ||
1209 | * The set-user-ID and set-group-ID bits of a file will be | ||
1210 | * cleared upon successful return from chown() | ||
1211 | */ | ||
1212 | if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && | ||
1213 | !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID)) | ||
1214 | ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); | ||
1215 | |||
1216 | /* | ||
1217 | * Change the ownerships and register quota modifications | ||
1218 | * in the transaction. | ||
1219 | */ | ||
1220 | if (xfs_get_projid(ip) != fa->fsx_projid) { | ||
1221 | if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { | ||
1222 | olddquot = xfs_qm_vop_chown(tp, ip, | ||
1223 | &ip->i_pdquot, pdqp); | ||
1224 | } | ||
1225 | ASSERT(ip->i_d.di_version > 1); | ||
1226 | xfs_set_projid(ip, fa->fsx_projid); | ||
1227 | } | ||
1228 | 1223 | ||
1229 | } | 1224 | if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && |
1225 | !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID)) | ||
1226 | ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); | ||
1230 | 1227 | ||
1231 | if (mask & FSX_XFLAGS) { | 1228 | /* Change the ownerships and register project quota modifications */ |
1232 | xfs_set_diflags(ip, fa->fsx_xflags); | 1229 | if (xfs_get_projid(ip) != fa->fsx_projid) { |
1233 | xfs_diflags_to_linux(ip); | 1230 | if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { |
1231 | olddquot = xfs_qm_vop_chown(tp, ip, | ||
1232 | &ip->i_pdquot, pdqp); | ||
1233 | } | ||
1234 | ASSERT(ip->i_d.di_version > 1); | ||
1235 | xfs_set_projid(ip, fa->fsx_projid); | ||
1234 | } | 1236 | } |
1235 | 1237 | ||
1236 | /* | 1238 | /* |
@@ -1238,34 +1240,12 @@ xfs_ioctl_setattr( | |||
1238 | * extent size hint should be set on the inode. If no extent size flags | 1240 | * extent size hint should be set on the inode. If no extent size flags |
1239 | * are set on the inode then unconditionally clear the extent size hint. | 1241 | * are set on the inode then unconditionally clear the extent size hint. |
1240 | */ | 1242 | */ |
1241 | if (mask & FSX_EXTSIZE) { | 1243 | if (ip->i_d.di_flags & (XFS_DIFLAG_EXTSIZE | XFS_DIFLAG_EXTSZINHERIT)) |
1242 | int extsize = 0; | 1244 | ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; |
1243 | 1245 | else | |
1244 | if (ip->i_d.di_flags & | 1246 | ip->i_d.di_extsize = 0; |
1245 | (XFS_DIFLAG_EXTSIZE | XFS_DIFLAG_EXTSZINHERIT)) | ||
1246 | extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; | ||
1247 | ip->i_d.di_extsize = extsize; | ||
1248 | } | ||
1249 | |||
1250 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); | ||
1251 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
1252 | |||
1253 | XFS_STATS_INC(xs_ig_attrchg); | ||
1254 | 1247 | ||
1255 | /* | ||
1256 | * If this is a synchronous mount, make sure that the | ||
1257 | * transaction goes to disk before returning to the user. | ||
1258 | * This is slightly sub-optimal in that truncates require | ||
1259 | * two sync transactions instead of one for wsync filesystems. | ||
1260 | * One for the truncate and one for the timestamps since we | ||
1261 | * don't want to change the timestamps unless we're sure the | ||
1262 | * truncate worked. Truncates are less than 1% of the laddis | ||
1263 | * mix so this probably isn't worth the trouble to optimize. | ||
1264 | */ | ||
1265 | if (mp->m_flags & XFS_MOUNT_WSYNC) | ||
1266 | xfs_trans_set_sync(tp); | ||
1267 | code = xfs_trans_commit(tp, 0); | 1248 | code = xfs_trans_commit(tp, 0); |
1268 | xfs_iunlock(ip, lock_flags); | ||
1269 | 1249 | ||
1270 | /* | 1250 | /* |
1271 | * Release any dquot(s) the inode had kept before chown. | 1251 | * Release any dquot(s) the inode had kept before chown. |
@@ -1276,12 +1256,11 @@ xfs_ioctl_setattr( | |||
1276 | 1256 | ||
1277 | return code; | 1257 | return code; |
1278 | 1258 | ||
1279 | error_return: | 1259 | error_trans_cancel: |
1260 | xfs_trans_cancel(tp, 0); | ||
1261 | error_free_dquots: | ||
1280 | xfs_qm_dqrele(udqp); | 1262 | xfs_qm_dqrele(udqp); |
1281 | xfs_qm_dqrele(pdqp); | 1263 | xfs_qm_dqrele(pdqp); |
1282 | xfs_trans_cancel(tp, 0); | ||
1283 | if (lock_flags) | ||
1284 | xfs_iunlock(ip, lock_flags); | ||
1285 | return code; | 1264 | return code; |
1286 | } | 1265 | } |
1287 | 1266 | ||
@@ -1292,20 +1271,15 @@ xfs_ioc_fssetxattr( | |||
1292 | void __user *arg) | 1271 | void __user *arg) |
1293 | { | 1272 | { |
1294 | struct fsxattr fa; | 1273 | struct fsxattr fa; |
1295 | unsigned int mask; | ||
1296 | int error; | 1274 | int error; |
1297 | 1275 | ||
1298 | if (copy_from_user(&fa, arg, sizeof(fa))) | 1276 | if (copy_from_user(&fa, arg, sizeof(fa))) |
1299 | return -EFAULT; | 1277 | return -EFAULT; |
1300 | 1278 | ||
1301 | mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID; | ||
1302 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) | ||
1303 | mask |= FSX_NONBLOCK; | ||
1304 | |||
1305 | error = mnt_want_write_file(filp); | 1279 | error = mnt_want_write_file(filp); |
1306 | if (error) | 1280 | if (error) |
1307 | return error; | 1281 | return error; |
1308 | error = xfs_ioctl_setattr(ip, &fa, mask); | 1282 | error = xfs_ioctl_setattr(ip, &fa); |
1309 | mnt_drop_write_file(filp); | 1283 | mnt_drop_write_file(filp); |
1310 | return error; | 1284 | return error; |
1311 | } | 1285 | } |
@@ -1325,14 +1299,14 @@ xfs_ioc_getxflags( | |||
1325 | 1299 | ||
1326 | STATIC int | 1300 | STATIC int |
1327 | xfs_ioc_setxflags( | 1301 | xfs_ioc_setxflags( |
1328 | xfs_inode_t *ip, | 1302 | struct xfs_inode *ip, |
1329 | struct file *filp, | 1303 | struct file *filp, |
1330 | void __user *arg) | 1304 | void __user *arg) |
1331 | { | 1305 | { |
1306 | struct xfs_trans *tp; | ||
1332 | struct fsxattr fa; | 1307 | struct fsxattr fa; |
1333 | unsigned int flags; | 1308 | unsigned int flags; |
1334 | unsigned int mask; | 1309 | int error; |
1335 | int error; | ||
1336 | 1310 | ||
1337 | if (copy_from_user(&flags, arg, sizeof(flags))) | 1311 | if (copy_from_user(&flags, arg, sizeof(flags))) |
1338 | return -EFAULT; | 1312 | return -EFAULT; |
@@ -1342,15 +1316,26 @@ xfs_ioc_setxflags( | |||
1342 | FS_SYNC_FL)) | 1316 | FS_SYNC_FL)) |
1343 | return -EOPNOTSUPP; | 1317 | return -EOPNOTSUPP; |
1344 | 1318 | ||
1345 | mask = FSX_XFLAGS; | ||
1346 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) | ||
1347 | mask |= FSX_NONBLOCK; | ||
1348 | fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); | 1319 | fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); |
1349 | 1320 | ||
1350 | error = mnt_want_write_file(filp); | 1321 | error = mnt_want_write_file(filp); |
1351 | if (error) | 1322 | if (error) |
1352 | return error; | 1323 | return error; |
1353 | error = xfs_ioctl_setattr(ip, &fa, mask); | 1324 | |
1325 | tp = xfs_ioctl_setattr_get_trans(ip); | ||
1326 | if (IS_ERR(tp)) { | ||
1327 | error = PTR_ERR(tp); | ||
1328 | goto out_drop_write; | ||
1329 | } | ||
1330 | |||
1331 | error = xfs_ioctl_setattr_xflags(tp, ip, &fa); | ||
1332 | if (error) { | ||
1333 | xfs_trans_cancel(tp, 0); | ||
1334 | goto out_drop_write; | ||
1335 | } | ||
1336 | |||
1337 | error = xfs_trans_commit(tp, 0); | ||
1338 | out_drop_write: | ||
1354 | mnt_drop_write_file(filp); | 1339 | mnt_drop_write_file(filp); |
1355 | return error; | 1340 | return error; |
1356 | } | 1341 | } |
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index ec6772866f3d..bfc7c7c8a0c8 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c | |||
@@ -423,7 +423,7 @@ xfs_compat_attrmulti_by_handle( | |||
423 | 423 | ||
424 | ops = memdup_user(compat_ptr(am_hreq.ops), size); | 424 | ops = memdup_user(compat_ptr(am_hreq.ops), size); |
425 | if (IS_ERR(ops)) { | 425 | if (IS_ERR(ops)) { |
426 | error = -PTR_ERR(ops); | 426 | error = PTR_ERR(ops); |
427 | goto out_dput; | 427 | goto out_dput; |
428 | } | 428 | } |
429 | 429 | ||
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index c980e2a5086b..ccb1dd0d509e 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -802,7 +802,7 @@ int | |||
802 | xfs_iomap_write_unwritten( | 802 | xfs_iomap_write_unwritten( |
803 | xfs_inode_t *ip, | 803 | xfs_inode_t *ip, |
804 | xfs_off_t offset, | 804 | xfs_off_t offset, |
805 | size_t count) | 805 | xfs_off_t count) |
806 | { | 806 | { |
807 | xfs_mount_t *mp = ip->i_mount; | 807 | xfs_mount_t *mp = ip->i_mount; |
808 | xfs_fileoff_t offset_fsb; | 808 | xfs_fileoff_t offset_fsb; |
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 411fbb8919ef..8688e663d744 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h | |||
@@ -27,6 +27,6 @@ int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, | |||
27 | struct xfs_bmbt_irec *); | 27 | struct xfs_bmbt_irec *); |
28 | int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, | 28 | int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, |
29 | struct xfs_bmbt_irec *); | 29 | struct xfs_bmbt_irec *); |
30 | int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); | 30 | int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t); |
31 | 31 | ||
32 | #endif /* __XFS_IOMAP_H__*/ | 32 | #endif /* __XFS_IOMAP_H__*/ |
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index c50311cae1b1..ce80eeb8faa4 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
@@ -380,18 +380,27 @@ xfs_vn_rename( | |||
380 | struct inode *odir, | 380 | struct inode *odir, |
381 | struct dentry *odentry, | 381 | struct dentry *odentry, |
382 | struct inode *ndir, | 382 | struct inode *ndir, |
383 | struct dentry *ndentry) | 383 | struct dentry *ndentry, |
384 | unsigned int flags) | ||
384 | { | 385 | { |
385 | struct inode *new_inode = ndentry->d_inode; | 386 | struct inode *new_inode = ndentry->d_inode; |
387 | int omode = 0; | ||
386 | struct xfs_name oname; | 388 | struct xfs_name oname; |
387 | struct xfs_name nname; | 389 | struct xfs_name nname; |
388 | 390 | ||
389 | xfs_dentry_to_name(&oname, odentry, 0); | 391 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) |
392 | return -EINVAL; | ||
393 | |||
394 | /* if we are exchanging files, we need to set i_mode of both files */ | ||
395 | if (flags & RENAME_EXCHANGE) | ||
396 | omode = ndentry->d_inode->i_mode; | ||
397 | |||
398 | xfs_dentry_to_name(&oname, odentry, omode); | ||
390 | xfs_dentry_to_name(&nname, ndentry, odentry->d_inode->i_mode); | 399 | xfs_dentry_to_name(&nname, ndentry, odentry->d_inode->i_mode); |
391 | 400 | ||
392 | return xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), | 401 | return xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), |
393 | XFS_I(ndir), &nname, new_inode ? | 402 | XFS_I(ndir), &nname, |
394 | XFS_I(new_inode) : NULL); | 403 | new_inode ? XFS_I(new_inode) : NULL, flags); |
395 | } | 404 | } |
396 | 405 | ||
397 | /* | 406 | /* |
@@ -1144,7 +1153,7 @@ static const struct inode_operations xfs_dir_inode_operations = { | |||
1144 | */ | 1153 | */ |
1145 | .rmdir = xfs_vn_unlink, | 1154 | .rmdir = xfs_vn_unlink, |
1146 | .mknod = xfs_vn_mknod, | 1155 | .mknod = xfs_vn_mknod, |
1147 | .rename = xfs_vn_rename, | 1156 | .rename2 = xfs_vn_rename, |
1148 | .get_acl = xfs_get_acl, | 1157 | .get_acl = xfs_get_acl, |
1149 | .set_acl = xfs_set_acl, | 1158 | .set_acl = xfs_set_acl, |
1150 | .getattr = xfs_vn_getattr, | 1159 | .getattr = xfs_vn_getattr, |
@@ -1172,7 +1181,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = { | |||
1172 | */ | 1181 | */ |
1173 | .rmdir = xfs_vn_unlink, | 1182 | .rmdir = xfs_vn_unlink, |
1174 | .mknod = xfs_vn_mknod, | 1183 | .mknod = xfs_vn_mknod, |
1175 | .rename = xfs_vn_rename, | 1184 | .rename2 = xfs_vn_rename, |
1176 | .get_acl = xfs_get_acl, | 1185 | .get_acl = xfs_get_acl, |
1177 | .set_acl = xfs_set_acl, | 1186 | .set_acl = xfs_set_acl, |
1178 | .getattr = xfs_vn_getattr, | 1187 | .getattr = xfs_vn_getattr, |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index e408bf5a3ff7..bcc7cfabb787 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include "xfs_fsops.h" | 33 | #include "xfs_fsops.h" |
34 | #include "xfs_cksum.h" | 34 | #include "xfs_cksum.h" |
35 | #include "xfs_sysfs.h" | 35 | #include "xfs_sysfs.h" |
36 | #include "xfs_sb.h" | ||
36 | 37 | ||
37 | kmem_zone_t *xfs_log_ticket_zone; | 38 | kmem_zone_t *xfs_log_ticket_zone; |
38 | 39 | ||
@@ -1290,9 +1291,20 @@ xfs_log_worker( | |||
1290 | struct xfs_mount *mp = log->l_mp; | 1291 | struct xfs_mount *mp = log->l_mp; |
1291 | 1292 | ||
1292 | /* dgc: errors ignored - not fatal and nowhere to report them */ | 1293 | /* dgc: errors ignored - not fatal and nowhere to report them */ |
1293 | if (xfs_log_need_covered(mp)) | 1294 | if (xfs_log_need_covered(mp)) { |
1294 | xfs_fs_log_dummy(mp); | 1295 | /* |
1295 | else | 1296 | * Dump a transaction into the log that contains no real change. |
1297 | * This is needed to stamp the current tail LSN into the log | ||
1298 | * during the covering operation. | ||
1299 | * | ||
1300 | * We cannot use an inode here for this - that will push dirty | ||
1301 | * state back up into the VFS and then periodic inode flushing | ||
1302 | * will prevent log covering from making progress. Hence we | ||
1303 | * synchronously log the superblock instead to ensure the | ||
1304 | * superblock is immediately unpinned and can be written back. | ||
1305 | */ | ||
1306 | xfs_sync_sb(mp, true); | ||
1307 | } else | ||
1296 | xfs_log_force(mp, 0); | 1308 | xfs_log_force(mp, 0); |
1297 | 1309 | ||
1298 | /* start pushing all the metadata that is currently dirty */ | 1310 | /* start pushing all the metadata that is currently dirty */ |
@@ -1395,6 +1407,8 @@ xlog_alloc_log( | |||
1395 | ASSERT(xfs_buf_islocked(bp)); | 1407 | ASSERT(xfs_buf_islocked(bp)); |
1396 | xfs_buf_unlock(bp); | 1408 | xfs_buf_unlock(bp); |
1397 | 1409 | ||
1410 | /* use high priority wq for log I/O completion */ | ||
1411 | bp->b_ioend_wq = mp->m_log_workqueue; | ||
1398 | bp->b_iodone = xlog_iodone; | 1412 | bp->b_iodone = xlog_iodone; |
1399 | log->l_xbuf = bp; | 1413 | log->l_xbuf = bp; |
1400 | 1414 | ||
@@ -1427,6 +1441,8 @@ xlog_alloc_log( | |||
1427 | ASSERT(xfs_buf_islocked(bp)); | 1441 | ASSERT(xfs_buf_islocked(bp)); |
1428 | xfs_buf_unlock(bp); | 1442 | xfs_buf_unlock(bp); |
1429 | 1443 | ||
1444 | /* use high priority wq for log I/O completion */ | ||
1445 | bp->b_ioend_wq = mp->m_log_workqueue; | ||
1430 | bp->b_iodone = xlog_iodone; | 1446 | bp->b_iodone = xlog_iodone; |
1431 | iclog->ic_bp = bp; | 1447 | iclog->ic_bp = bp; |
1432 | iclog->ic_data = bp->b_addr; | 1448 | iclog->ic_data = bp->b_addr; |
@@ -1806,8 +1822,6 @@ xlog_sync( | |||
1806 | XFS_BUF_ZEROFLAGS(bp); | 1822 | XFS_BUF_ZEROFLAGS(bp); |
1807 | XFS_BUF_ASYNC(bp); | 1823 | XFS_BUF_ASYNC(bp); |
1808 | bp->b_flags |= XBF_SYNCIO; | 1824 | bp->b_flags |= XBF_SYNCIO; |
1809 | /* use high priority completion wq */ | ||
1810 | bp->b_ioend_wq = log->l_mp->m_log_workqueue; | ||
1811 | 1825 | ||
1812 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) { | 1826 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) { |
1813 | bp->b_flags |= XBF_FUA; | 1827 | bp->b_flags |= XBF_FUA; |
@@ -1856,8 +1870,6 @@ xlog_sync( | |||
1856 | bp->b_flags |= XBF_SYNCIO; | 1870 | bp->b_flags |= XBF_SYNCIO; |
1857 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) | 1871 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) |
1858 | bp->b_flags |= XBF_FUA; | 1872 | bp->b_flags |= XBF_FUA; |
1859 | /* use high priority completion wq */ | ||
1860 | bp->b_ioend_wq = log->l_mp->m_log_workqueue; | ||
1861 | 1873 | ||
1862 | ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); | 1874 | ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); |
1863 | ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); | 1875 | ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); |
@@ -2027,7 +2039,7 @@ xlog_print_tic_res( | |||
2027 | " total reg = %u bytes (o/flow = %u bytes)\n" | 2039 | " total reg = %u bytes (o/flow = %u bytes)\n" |
2028 | " ophdrs = %u (ophdr space = %u bytes)\n" | 2040 | " ophdrs = %u (ophdr space = %u bytes)\n" |
2029 | " ophdr + reg = %u bytes\n" | 2041 | " ophdr + reg = %u bytes\n" |
2030 | " num regions = %u\n", | 2042 | " num regions = %u", |
2031 | ((ticket->t_trans_type <= 0 || | 2043 | ((ticket->t_trans_type <= 0 || |
2032 | ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? | 2044 | ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? |
2033 | "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), | 2045 | "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index d3d38836f87f..4fa80e63eea2 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -408,11 +408,11 @@ xfs_update_alignment(xfs_mount_t *mp) | |||
408 | if (xfs_sb_version_hasdalign(sbp)) { | 408 | if (xfs_sb_version_hasdalign(sbp)) { |
409 | if (sbp->sb_unit != mp->m_dalign) { | 409 | if (sbp->sb_unit != mp->m_dalign) { |
410 | sbp->sb_unit = mp->m_dalign; | 410 | sbp->sb_unit = mp->m_dalign; |
411 | mp->m_update_flags |= XFS_SB_UNIT; | 411 | mp->m_update_sb = true; |
412 | } | 412 | } |
413 | if (sbp->sb_width != mp->m_swidth) { | 413 | if (sbp->sb_width != mp->m_swidth) { |
414 | sbp->sb_width = mp->m_swidth; | 414 | sbp->sb_width = mp->m_swidth; |
415 | mp->m_update_flags |= XFS_SB_WIDTH; | 415 | mp->m_update_sb = true; |
416 | } | 416 | } |
417 | } else { | 417 | } else { |
418 | xfs_warn(mp, | 418 | xfs_warn(mp, |
@@ -583,38 +583,19 @@ int | |||
583 | xfs_mount_reset_sbqflags( | 583 | xfs_mount_reset_sbqflags( |
584 | struct xfs_mount *mp) | 584 | struct xfs_mount *mp) |
585 | { | 585 | { |
586 | int error; | ||
587 | struct xfs_trans *tp; | ||
588 | |||
589 | mp->m_qflags = 0; | 586 | mp->m_qflags = 0; |
590 | 587 | ||
591 | /* | 588 | /* It is OK to look at sb_qflags in the mount path without m_sb_lock. */ |
592 | * It is OK to look at sb_qflags here in mount path, | ||
593 | * without m_sb_lock. | ||
594 | */ | ||
595 | if (mp->m_sb.sb_qflags == 0) | 589 | if (mp->m_sb.sb_qflags == 0) |
596 | return 0; | 590 | return 0; |
597 | spin_lock(&mp->m_sb_lock); | 591 | spin_lock(&mp->m_sb_lock); |
598 | mp->m_sb.sb_qflags = 0; | 592 | mp->m_sb.sb_qflags = 0; |
599 | spin_unlock(&mp->m_sb_lock); | 593 | spin_unlock(&mp->m_sb_lock); |
600 | 594 | ||
601 | /* | 595 | if (!xfs_fs_writable(mp, SB_FREEZE_WRITE)) |
602 | * If the fs is readonly, let the incore superblock run | ||
603 | * with quotas off but don't flush the update out to disk | ||
604 | */ | ||
605 | if (mp->m_flags & XFS_MOUNT_RDONLY) | ||
606 | return 0; | 596 | return 0; |
607 | 597 | ||
608 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); | 598 | return xfs_sync_sb(mp, false); |
609 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_sbchange, 0, 0); | ||
610 | if (error) { | ||
611 | xfs_trans_cancel(tp, 0); | ||
612 | xfs_alert(mp, "%s: Superblock update failed!", __func__); | ||
613 | return error; | ||
614 | } | ||
615 | |||
616 | xfs_mod_sb(tp, XFS_SB_QFLAGS); | ||
617 | return xfs_trans_commit(tp, 0); | ||
618 | } | 599 | } |
619 | 600 | ||
620 | __uint64_t | 601 | __uint64_t |
@@ -659,26 +640,25 @@ xfs_mountfs( | |||
659 | xfs_sb_mount_common(mp, sbp); | 640 | xfs_sb_mount_common(mp, sbp); |
660 | 641 | ||
661 | /* | 642 | /* |
662 | * Check for a mismatched features2 values. Older kernels | 643 | * Check for a mismatched features2 values. Older kernels read & wrote |
663 | * read & wrote into the wrong sb offset for sb_features2 | 644 | * into the wrong sb offset for sb_features2 on some platforms due to |
664 | * on some platforms due to xfs_sb_t not being 64bit size aligned | 645 | * xfs_sb_t not being 64bit size aligned when sb_features2 was added, |
665 | * when sb_features2 was added, which made older superblock | 646 | * which made older superblock reading/writing routines swap it as a |
666 | * reading/writing routines swap it as a 64-bit value. | 647 | * 64-bit value. |
667 | * | 648 | * |
668 | * For backwards compatibility, we make both slots equal. | 649 | * For backwards compatibility, we make both slots equal. |
669 | * | 650 | * |
670 | * If we detect a mismatched field, we OR the set bits into the | 651 | * If we detect a mismatched field, we OR the set bits into the existing |
671 | * existing features2 field in case it has already been modified; we | 652 | * features2 field in case it has already been modified; we don't want |
672 | * don't want to lose any features. We then update the bad location | 653 | * to lose any features. We then update the bad location with the ORed |
673 | * with the ORed value so that older kernels will see any features2 | 654 | * value so that older kernels will see any features2 flags. The |
674 | * flags, and mark the two fields as needing updates once the | 655 | * superblock writeback code ensures the new sb_features2 is copied to |
675 | * transaction subsystem is online. | 656 | * sb_bad_features2 before it is logged or written to disk. |
676 | */ | 657 | */ |
677 | if (xfs_sb_has_mismatched_features2(sbp)) { | 658 | if (xfs_sb_has_mismatched_features2(sbp)) { |
678 | xfs_warn(mp, "correcting sb_features alignment problem"); | 659 | xfs_warn(mp, "correcting sb_features alignment problem"); |
679 | sbp->sb_features2 |= sbp->sb_bad_features2; | 660 | sbp->sb_features2 |= sbp->sb_bad_features2; |
680 | sbp->sb_bad_features2 = sbp->sb_features2; | 661 | mp->m_update_sb = true; |
681 | mp->m_update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2; | ||
682 | 662 | ||
683 | /* | 663 | /* |
684 | * Re-check for ATTR2 in case it was found in bad_features2 | 664 | * Re-check for ATTR2 in case it was found in bad_features2 |
@@ -692,17 +672,17 @@ xfs_mountfs( | |||
692 | if (xfs_sb_version_hasattr2(&mp->m_sb) && | 672 | if (xfs_sb_version_hasattr2(&mp->m_sb) && |
693 | (mp->m_flags & XFS_MOUNT_NOATTR2)) { | 673 | (mp->m_flags & XFS_MOUNT_NOATTR2)) { |
694 | xfs_sb_version_removeattr2(&mp->m_sb); | 674 | xfs_sb_version_removeattr2(&mp->m_sb); |
695 | mp->m_update_flags |= XFS_SB_FEATURES2; | 675 | mp->m_update_sb = true; |
696 | 676 | ||
697 | /* update sb_versionnum for the clearing of the morebits */ | 677 | /* update sb_versionnum for the clearing of the morebits */ |
698 | if (!sbp->sb_features2) | 678 | if (!sbp->sb_features2) |
699 | mp->m_update_flags |= XFS_SB_VERSIONNUM; | 679 | mp->m_update_sb = true; |
700 | } | 680 | } |
701 | 681 | ||
702 | /* always use v2 inodes by default now */ | 682 | /* always use v2 inodes by default now */ |
703 | if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) { | 683 | if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) { |
704 | mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT; | 684 | mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT; |
705 | mp->m_update_flags |= XFS_SB_VERSIONNUM; | 685 | mp->m_update_sb = true; |
706 | } | 686 | } |
707 | 687 | ||
708 | /* | 688 | /* |
@@ -895,8 +875,8 @@ xfs_mountfs( | |||
895 | * the next remount into writeable mode. Otherwise we would never | 875 | * the next remount into writeable mode. Otherwise we would never |
896 | * perform the update e.g. for the root filesystem. | 876 | * perform the update e.g. for the root filesystem. |
897 | */ | 877 | */ |
898 | if (mp->m_update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) { | 878 | if (mp->m_update_sb && !(mp->m_flags & XFS_MOUNT_RDONLY)) { |
899 | error = xfs_mount_log_sb(mp, mp->m_update_flags); | 879 | error = xfs_sync_sb(mp, false); |
900 | if (error) { | 880 | if (error) { |
901 | xfs_warn(mp, "failed to write sb changes"); | 881 | xfs_warn(mp, "failed to write sb changes"); |
902 | goto out_rtunmount; | 882 | goto out_rtunmount; |
@@ -1103,9 +1083,6 @@ xfs_fs_writable( | |||
1103 | int | 1083 | int |
1104 | xfs_log_sbcount(xfs_mount_t *mp) | 1084 | xfs_log_sbcount(xfs_mount_t *mp) |
1105 | { | 1085 | { |
1106 | xfs_trans_t *tp; | ||
1107 | int error; | ||
1108 | |||
1109 | /* allow this to proceed during the freeze sequence... */ | 1086 | /* allow this to proceed during the freeze sequence... */ |
1110 | if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE)) | 1087 | if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE)) |
1111 | return 0; | 1088 | return 0; |
@@ -1119,17 +1096,7 @@ xfs_log_sbcount(xfs_mount_t *mp) | |||
1119 | if (!xfs_sb_version_haslazysbcount(&mp->m_sb)) | 1096 | if (!xfs_sb_version_haslazysbcount(&mp->m_sb)) |
1120 | return 0; | 1097 | return 0; |
1121 | 1098 | ||
1122 | tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP); | 1099 | return xfs_sync_sb(mp, true); |
1123 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); | ||
1124 | if (error) { | ||
1125 | xfs_trans_cancel(tp, 0); | ||
1126 | return error; | ||
1127 | } | ||
1128 | |||
1129 | xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS); | ||
1130 | xfs_trans_set_sync(tp); | ||
1131 | error = xfs_trans_commit(tp, 0); | ||
1132 | return error; | ||
1133 | } | 1100 | } |
1134 | 1101 | ||
1135 | /* | 1102 | /* |
@@ -1423,34 +1390,6 @@ xfs_freesb( | |||
1423 | } | 1390 | } |
1424 | 1391 | ||
1425 | /* | 1392 | /* |
1426 | * Used to log changes to the superblock unit and width fields which could | ||
1427 | * be altered by the mount options, as well as any potential sb_features2 | ||
1428 | * fixup. Only the first superblock is updated. | ||
1429 | */ | ||
1430 | int | ||
1431 | xfs_mount_log_sb( | ||
1432 | xfs_mount_t *mp, | ||
1433 | __int64_t fields) | ||
1434 | { | ||
1435 | xfs_trans_t *tp; | ||
1436 | int error; | ||
1437 | |||
1438 | ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID | | ||
1439 | XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2 | | ||
1440 | XFS_SB_VERSIONNUM)); | ||
1441 | |||
1442 | tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); | ||
1443 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); | ||
1444 | if (error) { | ||
1445 | xfs_trans_cancel(tp, 0); | ||
1446 | return error; | ||
1447 | } | ||
1448 | xfs_mod_sb(tp, fields); | ||
1449 | error = xfs_trans_commit(tp, 0); | ||
1450 | return error; | ||
1451 | } | ||
1452 | |||
1453 | /* | ||
1454 | * If the underlying (data/log/rt) device is readonly, there are some | 1393 | * If the underlying (data/log/rt) device is readonly, there are some |
1455 | * operations that cannot proceed. | 1394 | * operations that cannot proceed. |
1456 | */ | 1395 | */ |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 22ccf69d4d3c..a5b2ff822653 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -162,8 +162,7 @@ typedef struct xfs_mount { | |||
162 | struct delayed_work m_reclaim_work; /* background inode reclaim */ | 162 | struct delayed_work m_reclaim_work; /* background inode reclaim */ |
163 | struct delayed_work m_eofblocks_work; /* background eof blocks | 163 | struct delayed_work m_eofblocks_work; /* background eof blocks |
164 | trimming */ | 164 | trimming */ |
165 | __int64_t m_update_flags; /* sb flags we need to update | 165 | bool m_update_sb; /* sb needs update in mount */ |
166 | on the next remount,rw */ | ||
167 | int64_t m_low_space[XFS_LOWSP_MAX]; | 166 | int64_t m_low_space[XFS_LOWSP_MAX]; |
168 | /* low free space thresholds */ | 167 | /* low free space thresholds */ |
169 | struct xfs_kobj m_kobj; | 168 | struct xfs_kobj m_kobj; |
@@ -378,7 +377,7 @@ extern void xfs_unmountfs(xfs_mount_t *); | |||
378 | extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); | 377 | extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); |
379 | extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, | 378 | extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, |
380 | uint, int); | 379 | uint, int); |
381 | extern int xfs_mount_log_sb(xfs_mount_t *, __int64_t); | 380 | extern int xfs_mount_log_sb(xfs_mount_t *); |
382 | extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); | 381 | extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); |
383 | extern int xfs_readsb(xfs_mount_t *, int); | 382 | extern int xfs_readsb(xfs_mount_t *, int); |
384 | extern void xfs_freesb(xfs_mount_t *); | 383 | extern void xfs_freesb(xfs_mount_t *); |
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 79fb19dd9c83..53cc2aaf8d2b 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
@@ -430,6 +430,7 @@ struct xfs_qm_isolate { | |||
430 | static enum lru_status | 430 | static enum lru_status |
431 | xfs_qm_dquot_isolate( | 431 | xfs_qm_dquot_isolate( |
432 | struct list_head *item, | 432 | struct list_head *item, |
433 | struct list_lru_one *lru, | ||
433 | spinlock_t *lru_lock, | 434 | spinlock_t *lru_lock, |
434 | void *arg) | 435 | void *arg) |
435 | __releases(lru_lock) __acquires(lru_lock) | 436 | __releases(lru_lock) __acquires(lru_lock) |
@@ -450,7 +451,7 @@ xfs_qm_dquot_isolate( | |||
450 | XFS_STATS_INC(xs_qm_dqwants); | 451 | XFS_STATS_INC(xs_qm_dqwants); |
451 | 452 | ||
452 | trace_xfs_dqreclaim_want(dqp); | 453 | trace_xfs_dqreclaim_want(dqp); |
453 | list_del_init(&dqp->q_lru); | 454 | list_lru_isolate(lru, &dqp->q_lru); |
454 | XFS_STATS_DEC(xs_qm_dquot_unused); | 455 | XFS_STATS_DEC(xs_qm_dquot_unused); |
455 | return LRU_REMOVED; | 456 | return LRU_REMOVED; |
456 | } | 457 | } |
@@ -494,7 +495,7 @@ xfs_qm_dquot_isolate( | |||
494 | xfs_dqunlock(dqp); | 495 | xfs_dqunlock(dqp); |
495 | 496 | ||
496 | ASSERT(dqp->q_nrefs == 0); | 497 | ASSERT(dqp->q_nrefs == 0); |
497 | list_move_tail(&dqp->q_lru, &isol->dispose); | 498 | list_lru_isolate_move(lru, &dqp->q_lru, &isol->dispose); |
498 | XFS_STATS_DEC(xs_qm_dquot_unused); | 499 | XFS_STATS_DEC(xs_qm_dquot_unused); |
499 | trace_xfs_dqreclaim_done(dqp); | 500 | trace_xfs_dqreclaim_done(dqp); |
500 | XFS_STATS_INC(xs_qm_dqreclaims); | 501 | XFS_STATS_INC(xs_qm_dqreclaims); |
@@ -523,7 +524,6 @@ xfs_qm_shrink_scan( | |||
523 | struct xfs_qm_isolate isol; | 524 | struct xfs_qm_isolate isol; |
524 | unsigned long freed; | 525 | unsigned long freed; |
525 | int error; | 526 | int error; |
526 | unsigned long nr_to_scan = sc->nr_to_scan; | ||
527 | 527 | ||
528 | if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) | 528 | if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) |
529 | return 0; | 529 | return 0; |
@@ -531,8 +531,8 @@ xfs_qm_shrink_scan( | |||
531 | INIT_LIST_HEAD(&isol.buffers); | 531 | INIT_LIST_HEAD(&isol.buffers); |
532 | INIT_LIST_HEAD(&isol.dispose); | 532 | INIT_LIST_HEAD(&isol.dispose); |
533 | 533 | ||
534 | freed = list_lru_walk_node(&qi->qi_lru, sc->nid, xfs_qm_dquot_isolate, &isol, | 534 | freed = list_lru_shrink_walk(&qi->qi_lru, sc, |
535 | &nr_to_scan); | 535 | xfs_qm_dquot_isolate, &isol); |
536 | 536 | ||
537 | error = xfs_buf_delwri_submit(&isol.buffers); | 537 | error = xfs_buf_delwri_submit(&isol.buffers); |
538 | if (error) | 538 | if (error) |
@@ -557,7 +557,7 @@ xfs_qm_shrink_count( | |||
557 | struct xfs_quotainfo *qi = container_of(shrink, | 557 | struct xfs_quotainfo *qi = container_of(shrink, |
558 | struct xfs_quotainfo, qi_shrinker); | 558 | struct xfs_quotainfo, qi_shrinker); |
559 | 559 | ||
560 | return list_lru_count_node(&qi->qi_lru, sc->nid); | 560 | return list_lru_shrink_count(&qi->qi_lru, sc); |
561 | } | 561 | } |
562 | 562 | ||
563 | /* | 563 | /* |
@@ -714,7 +714,6 @@ STATIC int | |||
714 | xfs_qm_qino_alloc( | 714 | xfs_qm_qino_alloc( |
715 | xfs_mount_t *mp, | 715 | xfs_mount_t *mp, |
716 | xfs_inode_t **ip, | 716 | xfs_inode_t **ip, |
717 | __int64_t sbfields, | ||
718 | uint flags) | 717 | uint flags) |
719 | { | 718 | { |
720 | xfs_trans_t *tp; | 719 | xfs_trans_t *tp; |
@@ -777,11 +776,6 @@ xfs_qm_qino_alloc( | |||
777 | spin_lock(&mp->m_sb_lock); | 776 | spin_lock(&mp->m_sb_lock); |
778 | if (flags & XFS_QMOPT_SBVERSION) { | 777 | if (flags & XFS_QMOPT_SBVERSION) { |
779 | ASSERT(!xfs_sb_version_hasquota(&mp->m_sb)); | 778 | ASSERT(!xfs_sb_version_hasquota(&mp->m_sb)); |
780 | ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | | ||
781 | XFS_SB_GQUOTINO | XFS_SB_PQUOTINO | XFS_SB_QFLAGS)) == | ||
782 | (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | | ||
783 | XFS_SB_GQUOTINO | XFS_SB_PQUOTINO | | ||
784 | XFS_SB_QFLAGS)); | ||
785 | 779 | ||
786 | xfs_sb_version_addquota(&mp->m_sb); | 780 | xfs_sb_version_addquota(&mp->m_sb); |
787 | mp->m_sb.sb_uquotino = NULLFSINO; | 781 | mp->m_sb.sb_uquotino = NULLFSINO; |
@@ -798,7 +792,7 @@ xfs_qm_qino_alloc( | |||
798 | else | 792 | else |
799 | mp->m_sb.sb_pquotino = (*ip)->i_ino; | 793 | mp->m_sb.sb_pquotino = (*ip)->i_ino; |
800 | spin_unlock(&mp->m_sb_lock); | 794 | spin_unlock(&mp->m_sb_lock); |
801 | xfs_mod_sb(tp, sbfields); | 795 | xfs_log_sb(tp); |
802 | 796 | ||
803 | if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { | 797 | if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { |
804 | xfs_alert(mp, "%s failed (error %d)!", __func__, error); | 798 | xfs_alert(mp, "%s failed (error %d)!", __func__, error); |
@@ -1451,7 +1445,7 @@ xfs_qm_mount_quotas( | |||
1451 | spin_unlock(&mp->m_sb_lock); | 1445 | spin_unlock(&mp->m_sb_lock); |
1452 | 1446 | ||
1453 | if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) { | 1447 | if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) { |
1454 | if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) { | 1448 | if (xfs_sync_sb(mp, false)) { |
1455 | /* | 1449 | /* |
1456 | * We could only have been turning quotas off. | 1450 | * We could only have been turning quotas off. |
1457 | * We aren't in very good shape actually because | 1451 | * We aren't in very good shape actually because |
@@ -1482,7 +1476,6 @@ xfs_qm_init_quotainos( | |||
1482 | struct xfs_inode *gip = NULL; | 1476 | struct xfs_inode *gip = NULL; |
1483 | struct xfs_inode *pip = NULL; | 1477 | struct xfs_inode *pip = NULL; |
1484 | int error; | 1478 | int error; |
1485 | __int64_t sbflags = 0; | ||
1486 | uint flags = 0; | 1479 | uint flags = 0; |
1487 | 1480 | ||
1488 | ASSERT(mp->m_quotainfo); | 1481 | ASSERT(mp->m_quotainfo); |
@@ -1517,9 +1510,6 @@ xfs_qm_init_quotainos( | |||
1517 | } | 1510 | } |
1518 | } else { | 1511 | } else { |
1519 | flags |= XFS_QMOPT_SBVERSION; | 1512 | flags |= XFS_QMOPT_SBVERSION; |
1520 | sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | | ||
1521 | XFS_SB_GQUOTINO | XFS_SB_PQUOTINO | | ||
1522 | XFS_SB_QFLAGS); | ||
1523 | } | 1513 | } |
1524 | 1514 | ||
1525 | /* | 1515 | /* |
@@ -1530,7 +1520,6 @@ xfs_qm_init_quotainos( | |||
1530 | */ | 1520 | */ |
1531 | if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) { | 1521 | if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) { |
1532 | error = xfs_qm_qino_alloc(mp, &uip, | 1522 | error = xfs_qm_qino_alloc(mp, &uip, |
1533 | sbflags | XFS_SB_UQUOTINO, | ||
1534 | flags | XFS_QMOPT_UQUOTA); | 1523 | flags | XFS_QMOPT_UQUOTA); |
1535 | if (error) | 1524 | if (error) |
1536 | goto error_rele; | 1525 | goto error_rele; |
@@ -1539,7 +1528,6 @@ xfs_qm_init_quotainos( | |||
1539 | } | 1528 | } |
1540 | if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) { | 1529 | if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) { |
1541 | error = xfs_qm_qino_alloc(mp, &gip, | 1530 | error = xfs_qm_qino_alloc(mp, &gip, |
1542 | sbflags | XFS_SB_GQUOTINO, | ||
1543 | flags | XFS_QMOPT_GQUOTA); | 1531 | flags | XFS_QMOPT_GQUOTA); |
1544 | if (error) | 1532 | if (error) |
1545 | goto error_rele; | 1533 | goto error_rele; |
@@ -1548,7 +1536,6 @@ xfs_qm_init_quotainos( | |||
1548 | } | 1536 | } |
1549 | if (XFS_IS_PQUOTA_ON(mp) && pip == NULL) { | 1537 | if (XFS_IS_PQUOTA_ON(mp) && pip == NULL) { |
1550 | error = xfs_qm_qino_alloc(mp, &pip, | 1538 | error = xfs_qm_qino_alloc(mp, &pip, |
1551 | sbflags | XFS_SB_PQUOTINO, | ||
1552 | flags | XFS_QMOPT_PQUOTA); | 1539 | flags | XFS_QMOPT_PQUOTA); |
1553 | if (error) | 1540 | if (error) |
1554 | goto error_rele; | 1541 | goto error_rele; |
@@ -1587,32 +1574,6 @@ xfs_qm_dqfree_one( | |||
1587 | xfs_qm_dqdestroy(dqp); | 1574 | xfs_qm_dqdestroy(dqp); |
1588 | } | 1575 | } |
1589 | 1576 | ||
1590 | /* | ||
1591 | * Start a transaction and write the incore superblock changes to | ||
1592 | * disk. flags parameter indicates which fields have changed. | ||
1593 | */ | ||
1594 | int | ||
1595 | xfs_qm_write_sb_changes( | ||
1596 | xfs_mount_t *mp, | ||
1597 | __int64_t flags) | ||
1598 | { | ||
1599 | xfs_trans_t *tp; | ||
1600 | int error; | ||
1601 | |||
1602 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); | ||
1603 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_sbchange, 0, 0); | ||
1604 | if (error) { | ||
1605 | xfs_trans_cancel(tp, 0); | ||
1606 | return error; | ||
1607 | } | ||
1608 | |||
1609 | xfs_mod_sb(tp, flags); | ||
1610 | error = xfs_trans_commit(tp, 0); | ||
1611 | |||
1612 | return error; | ||
1613 | } | ||
1614 | |||
1615 | |||
1616 | /* --------------- utility functions for vnodeops ---------------- */ | 1577 | /* --------------- utility functions for vnodeops ---------------- */ |
1617 | 1578 | ||
1618 | 1579 | ||
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 41f6c0b9d51c..0d4d3590cf85 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h | |||
@@ -157,7 +157,6 @@ struct xfs_dquot_acct { | |||
157 | #define XFS_QM_RTBWARNLIMIT 5 | 157 | #define XFS_QM_RTBWARNLIMIT 5 |
158 | 158 | ||
159 | extern void xfs_qm_destroy_quotainfo(struct xfs_mount *); | 159 | extern void xfs_qm_destroy_quotainfo(struct xfs_mount *); |
160 | extern int xfs_qm_write_sb_changes(struct xfs_mount *, __int64_t); | ||
161 | 160 | ||
162 | /* dquot stuff */ | 161 | /* dquot stuff */ |
163 | extern void xfs_qm_dqpurge_all(struct xfs_mount *, uint); | 162 | extern void xfs_qm_dqpurge_all(struct xfs_mount *, uint); |
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index cb6168ec92c9..9b965db45800 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c | |||
@@ -91,8 +91,7 @@ xfs_qm_scall_quotaoff( | |||
91 | mutex_unlock(&q->qi_quotaofflock); | 91 | mutex_unlock(&q->qi_quotaofflock); |
92 | 92 | ||
93 | /* XXX what to do if error ? Revert back to old vals incore ? */ | 93 | /* XXX what to do if error ? Revert back to old vals incore ? */ |
94 | error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS); | 94 | return xfs_sync_sb(mp, false); |
95 | return error; | ||
96 | } | 95 | } |
97 | 96 | ||
98 | dqtype = 0; | 97 | dqtype = 0; |
@@ -313,7 +312,6 @@ xfs_qm_scall_quotaon( | |||
313 | { | 312 | { |
314 | int error; | 313 | int error; |
315 | uint qf; | 314 | uint qf; |
316 | __int64_t sbflags; | ||
317 | 315 | ||
318 | flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); | 316 | flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); |
319 | /* | 317 | /* |
@@ -321,30 +319,22 @@ xfs_qm_scall_quotaon( | |||
321 | */ | 319 | */ |
322 | flags &= ~(XFS_ALL_QUOTA_ACCT); | 320 | flags &= ~(XFS_ALL_QUOTA_ACCT); |
323 | 321 | ||
324 | sbflags = 0; | ||
325 | |||
326 | if (flags == 0) { | 322 | if (flags == 0) { |
327 | xfs_debug(mp, "%s: zero flags, m_qflags=%x", | 323 | xfs_debug(mp, "%s: zero flags, m_qflags=%x", |
328 | __func__, mp->m_qflags); | 324 | __func__, mp->m_qflags); |
329 | return -EINVAL; | 325 | return -EINVAL; |
330 | } | 326 | } |
331 | 327 | ||
332 | /* No fs can turn on quotas with a delayed effect */ | ||
333 | ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0); | ||
334 | |||
335 | /* | 328 | /* |
336 | * Can't enforce without accounting. We check the superblock | 329 | * Can't enforce without accounting. We check the superblock |
337 | * qflags here instead of m_qflags because rootfs can have | 330 | * qflags here instead of m_qflags because rootfs can have |
338 | * quota acct on ondisk without m_qflags' knowing. | 331 | * quota acct on ondisk without m_qflags' knowing. |
339 | */ | 332 | */ |
340 | if (((flags & XFS_UQUOTA_ACCT) == 0 && | 333 | if (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 && |
341 | (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 && | ||
342 | (flags & XFS_UQUOTA_ENFD)) || | 334 | (flags & XFS_UQUOTA_ENFD)) || |
343 | ((flags & XFS_GQUOTA_ACCT) == 0 && | 335 | ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && |
344 | (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && | ||
345 | (flags & XFS_GQUOTA_ENFD)) || | 336 | (flags & XFS_GQUOTA_ENFD)) || |
346 | ((flags & XFS_PQUOTA_ACCT) == 0 && | 337 | ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && |
347 | (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && | ||
348 | (flags & XFS_PQUOTA_ENFD))) { | 338 | (flags & XFS_PQUOTA_ENFD))) { |
349 | xfs_debug(mp, | 339 | xfs_debug(mp, |
350 | "%s: Can't enforce without acct, flags=%x sbflags=%x", | 340 | "%s: Can't enforce without acct, flags=%x sbflags=%x", |
@@ -369,11 +359,11 @@ xfs_qm_scall_quotaon( | |||
369 | /* | 359 | /* |
370 | * There's nothing to change if it's the same. | 360 | * There's nothing to change if it's the same. |
371 | */ | 361 | */ |
372 | if ((qf & flags) == flags && sbflags == 0) | 362 | if ((qf & flags) == flags) |
373 | return -EEXIST; | 363 | return -EEXIST; |
374 | sbflags |= XFS_SB_QFLAGS; | ||
375 | 364 | ||
376 | if ((error = xfs_qm_write_sb_changes(mp, sbflags))) | 365 | error = xfs_sync_sb(mp, false); |
366 | if (error) | ||
377 | return error; | 367 | return error; |
378 | /* | 368 | /* |
379 | * If we aren't trying to switch on quota enforcement, we are done. | 369 | * If we aren't trying to switch on quota enforcement, we are done. |
@@ -383,8 +373,7 @@ xfs_qm_scall_quotaon( | |||
383 | ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) != | 373 | ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) != |
384 | (mp->m_qflags & XFS_PQUOTA_ACCT)) || | 374 | (mp->m_qflags & XFS_PQUOTA_ACCT)) || |
385 | ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) != | 375 | ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) != |
386 | (mp->m_qflags & XFS_GQUOTA_ACCT)) || | 376 | (mp->m_qflags & XFS_GQUOTA_ACCT))) |
387 | (flags & XFS_ALL_QUOTA_ENFD) == 0) | ||
388 | return 0; | 377 | return 0; |
389 | 378 | ||
390 | if (! XFS_IS_QUOTA_RUNNING(mp)) | 379 | if (! XFS_IS_QUOTA_RUNNING(mp)) |
@@ -421,20 +410,12 @@ xfs_qm_scall_getqstat( | |||
421 | memset(out, 0, sizeof(fs_quota_stat_t)); | 410 | memset(out, 0, sizeof(fs_quota_stat_t)); |
422 | 411 | ||
423 | out->qs_version = FS_QSTAT_VERSION; | 412 | out->qs_version = FS_QSTAT_VERSION; |
424 | if (!xfs_sb_version_hasquota(&mp->m_sb)) { | ||
425 | out->qs_uquota.qfs_ino = NULLFSINO; | ||
426 | out->qs_gquota.qfs_ino = NULLFSINO; | ||
427 | return 0; | ||
428 | } | ||
429 | |||
430 | out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags & | 413 | out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags & |
431 | (XFS_ALL_QUOTA_ACCT| | 414 | (XFS_ALL_QUOTA_ACCT| |
432 | XFS_ALL_QUOTA_ENFD)); | 415 | XFS_ALL_QUOTA_ENFD)); |
433 | if (q) { | 416 | uip = q->qi_uquotaip; |
434 | uip = q->qi_uquotaip; | 417 | gip = q->qi_gquotaip; |
435 | gip = q->qi_gquotaip; | 418 | pip = q->qi_pquotaip; |
436 | pip = q->qi_pquotaip; | ||
437 | } | ||
438 | if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { | 419 | if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { |
439 | if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, | 420 | if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, |
440 | 0, 0, &uip) == 0) | 421 | 0, 0, &uip) == 0) |
@@ -480,14 +461,13 @@ xfs_qm_scall_getqstat( | |||
480 | if (temppqip) | 461 | if (temppqip) |
481 | IRELE(pip); | 462 | IRELE(pip); |
482 | } | 463 | } |
483 | if (q) { | 464 | out->qs_incoredqs = q->qi_dquots; |
484 | out->qs_incoredqs = q->qi_dquots; | 465 | out->qs_btimelimit = q->qi_btimelimit; |
485 | out->qs_btimelimit = q->qi_btimelimit; | 466 | out->qs_itimelimit = q->qi_itimelimit; |
486 | out->qs_itimelimit = q->qi_itimelimit; | 467 | out->qs_rtbtimelimit = q->qi_rtbtimelimit; |
487 | out->qs_rtbtimelimit = q->qi_rtbtimelimit; | 468 | out->qs_bwarnlimit = q->qi_bwarnlimit; |
488 | out->qs_bwarnlimit = q->qi_bwarnlimit; | 469 | out->qs_iwarnlimit = q->qi_iwarnlimit; |
489 | out->qs_iwarnlimit = q->qi_iwarnlimit; | 470 | |
490 | } | ||
491 | return 0; | 471 | return 0; |
492 | } | 472 | } |
493 | 473 | ||
@@ -508,13 +488,6 @@ xfs_qm_scall_getqstatv( | |||
508 | bool tempgqip = false; | 488 | bool tempgqip = false; |
509 | bool temppqip = false; | 489 | bool temppqip = false; |
510 | 490 | ||
511 | if (!xfs_sb_version_hasquota(&mp->m_sb)) { | ||
512 | out->qs_uquota.qfs_ino = NULLFSINO; | ||
513 | out->qs_gquota.qfs_ino = NULLFSINO; | ||
514 | out->qs_pquota.qfs_ino = NULLFSINO; | ||
515 | return 0; | ||
516 | } | ||
517 | |||
518 | out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags & | 491 | out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags & |
519 | (XFS_ALL_QUOTA_ACCT| | 492 | (XFS_ALL_QUOTA_ACCT| |
520 | XFS_ALL_QUOTA_ENFD)); | 493 | XFS_ALL_QUOTA_ENFD)); |
@@ -522,11 +495,9 @@ xfs_qm_scall_getqstatv( | |||
522 | out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino; | 495 | out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino; |
523 | out->qs_pquota.qfs_ino = mp->m_sb.sb_pquotino; | 496 | out->qs_pquota.qfs_ino = mp->m_sb.sb_pquotino; |
524 | 497 | ||
525 | if (q) { | 498 | uip = q->qi_uquotaip; |
526 | uip = q->qi_uquotaip; | 499 | gip = q->qi_gquotaip; |
527 | gip = q->qi_gquotaip; | 500 | pip = q->qi_pquotaip; |
528 | pip = q->qi_pquotaip; | ||
529 | } | ||
530 | if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { | 501 | if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { |
531 | if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, | 502 | if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, |
532 | 0, 0, &uip) == 0) | 503 | 0, 0, &uip) == 0) |
@@ -561,14 +532,13 @@ xfs_qm_scall_getqstatv( | |||
561 | if (temppqip) | 532 | if (temppqip) |
562 | IRELE(pip); | 533 | IRELE(pip); |
563 | } | 534 | } |
564 | if (q) { | 535 | out->qs_incoredqs = q->qi_dquots; |
565 | out->qs_incoredqs = q->qi_dquots; | 536 | out->qs_btimelimit = q->qi_btimelimit; |
566 | out->qs_btimelimit = q->qi_btimelimit; | 537 | out->qs_itimelimit = q->qi_itimelimit; |
567 | out->qs_itimelimit = q->qi_itimelimit; | 538 | out->qs_rtbtimelimit = q->qi_rtbtimelimit; |
568 | out->qs_rtbtimelimit = q->qi_rtbtimelimit; | 539 | out->qs_bwarnlimit = q->qi_bwarnlimit; |
569 | out->qs_bwarnlimit = q->qi_bwarnlimit; | 540 | out->qs_iwarnlimit = q->qi_iwarnlimit; |
570 | out->qs_iwarnlimit = q->qi_iwarnlimit; | 541 | |
571 | } | ||
572 | return 0; | 542 | return 0; |
573 | } | 543 | } |
574 | 544 | ||
@@ -800,7 +770,7 @@ xfs_qm_log_quotaoff( | |||
800 | mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL; | 770 | mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL; |
801 | spin_unlock(&mp->m_sb_lock); | 771 | spin_unlock(&mp->m_sb_lock); |
802 | 772 | ||
803 | xfs_mod_sb(tp, XFS_SB_QFLAGS); | 773 | xfs_log_sb(tp); |
804 | 774 | ||
805 | /* | 775 | /* |
806 | * We have to make sure that the transaction is secure on disk before we | 776 | * We have to make sure that the transaction is secure on disk before we |
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 801a84c1cdc3..6923905ab33d 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c | |||
@@ -64,19 +64,10 @@ xfs_fs_get_xstatev( | |||
64 | return xfs_qm_scall_getqstatv(mp, fqs); | 64 | return xfs_qm_scall_getqstatv(mp, fqs); |
65 | } | 65 | } |
66 | 66 | ||
67 | STATIC int | 67 | static unsigned int |
68 | xfs_fs_set_xstate( | 68 | xfs_quota_flags(unsigned int uflags) |
69 | struct super_block *sb, | ||
70 | unsigned int uflags, | ||
71 | int op) | ||
72 | { | 69 | { |
73 | struct xfs_mount *mp = XFS_M(sb); | 70 | unsigned int flags = 0; |
74 | unsigned int flags = 0; | ||
75 | |||
76 | if (sb->s_flags & MS_RDONLY) | ||
77 | return -EROFS; | ||
78 | if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp)) | ||
79 | return -ENOSYS; | ||
80 | 71 | ||
81 | if (uflags & FS_QUOTA_UDQ_ACCT) | 72 | if (uflags & FS_QUOTA_UDQ_ACCT) |
82 | flags |= XFS_UQUOTA_ACCT; | 73 | flags |= XFS_UQUOTA_ACCT; |
@@ -91,16 +82,39 @@ xfs_fs_set_xstate( | |||
91 | if (uflags & FS_QUOTA_PDQ_ENFD) | 82 | if (uflags & FS_QUOTA_PDQ_ENFD) |
92 | flags |= XFS_PQUOTA_ENFD; | 83 | flags |= XFS_PQUOTA_ENFD; |
93 | 84 | ||
94 | switch (op) { | 85 | return flags; |
95 | case Q_XQUOTAON: | 86 | } |
96 | return xfs_qm_scall_quotaon(mp, flags); | 87 | |
97 | case Q_XQUOTAOFF: | 88 | STATIC int |
98 | if (!XFS_IS_QUOTA_ON(mp)) | 89 | xfs_quota_enable( |
99 | return -EINVAL; | 90 | struct super_block *sb, |
100 | return xfs_qm_scall_quotaoff(mp, flags); | 91 | unsigned int uflags) |
101 | } | 92 | { |
93 | struct xfs_mount *mp = XFS_M(sb); | ||
94 | |||
95 | if (sb->s_flags & MS_RDONLY) | ||
96 | return -EROFS; | ||
97 | if (!XFS_IS_QUOTA_RUNNING(mp)) | ||
98 | return -ENOSYS; | ||
99 | |||
100 | return xfs_qm_scall_quotaon(mp, xfs_quota_flags(uflags)); | ||
101 | } | ||
102 | |||
103 | STATIC int | ||
104 | xfs_quota_disable( | ||
105 | struct super_block *sb, | ||
106 | unsigned int uflags) | ||
107 | { | ||
108 | struct xfs_mount *mp = XFS_M(sb); | ||
109 | |||
110 | if (sb->s_flags & MS_RDONLY) | ||
111 | return -EROFS; | ||
112 | if (!XFS_IS_QUOTA_RUNNING(mp)) | ||
113 | return -ENOSYS; | ||
114 | if (!XFS_IS_QUOTA_ON(mp)) | ||
115 | return -EINVAL; | ||
102 | 116 | ||
103 | return -EINVAL; | 117 | return xfs_qm_scall_quotaoff(mp, xfs_quota_flags(uflags)); |
104 | } | 118 | } |
105 | 119 | ||
106 | STATIC int | 120 | STATIC int |
@@ -166,7 +180,8 @@ xfs_fs_set_dqblk( | |||
166 | const struct quotactl_ops xfs_quotactl_operations = { | 180 | const struct quotactl_ops xfs_quotactl_operations = { |
167 | .get_xstatev = xfs_fs_get_xstatev, | 181 | .get_xstatev = xfs_fs_get_xstatev, |
168 | .get_xstate = xfs_fs_get_xstate, | 182 | .get_xstate = xfs_fs_get_xstate, |
169 | .set_xstate = xfs_fs_set_xstate, | 183 | .quota_enable = xfs_quota_enable, |
184 | .quota_disable = xfs_quota_disable, | ||
170 | .rm_xquota = xfs_fs_rm_xquota, | 185 | .rm_xquota = xfs_fs_rm_xquota, |
171 | .get_dqblk = xfs_fs_get_dqblk, | 186 | .get_dqblk = xfs_fs_get_dqblk, |
172 | .set_dqblk = xfs_fs_set_dqblk, | 187 | .set_dqblk = xfs_fs_set_dqblk, |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 19cbda196369..8fcc4ccc5c79 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -685,7 +685,7 @@ xfs_blkdev_get( | |||
685 | mp); | 685 | mp); |
686 | if (IS_ERR(*bdevp)) { | 686 | if (IS_ERR(*bdevp)) { |
687 | error = PTR_ERR(*bdevp); | 687 | error = PTR_ERR(*bdevp); |
688 | xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error); | 688 | xfs_warn(mp, "Invalid device [%s], error=%d", name, error); |
689 | } | 689 | } |
690 | 690 | ||
691 | return error; | 691 | return error; |
@@ -1111,6 +1111,11 @@ xfs_fs_statfs( | |||
1111 | statp->f_files, | 1111 | statp->f_files, |
1112 | mp->m_maxicount); | 1112 | mp->m_maxicount); |
1113 | 1113 | ||
1114 | /* If sb_icount overshot maxicount, report actual allocation */ | ||
1115 | statp->f_files = max_t(typeof(statp->f_files), | ||
1116 | statp->f_files, | ||
1117 | sbp->sb_icount); | ||
1118 | |||
1114 | /* make sure statp->f_ffree does not underflow */ | 1119 | /* make sure statp->f_ffree does not underflow */ |
1115 | ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); | 1120 | ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); |
1116 | statp->f_ffree = max_t(__int64_t, ffree, 0); | 1121 | statp->f_ffree = max_t(__int64_t, ffree, 0); |
@@ -1257,13 +1262,13 @@ xfs_fs_remount( | |||
1257 | * If this is the first remount to writeable state we | 1262 | * If this is the first remount to writeable state we |
1258 | * might have some superblock changes to update. | 1263 | * might have some superblock changes to update. |
1259 | */ | 1264 | */ |
1260 | if (mp->m_update_flags) { | 1265 | if (mp->m_update_sb) { |
1261 | error = xfs_mount_log_sb(mp, mp->m_update_flags); | 1266 | error = xfs_sync_sb(mp, false); |
1262 | if (error) { | 1267 | if (error) { |
1263 | xfs_warn(mp, "failed to write sb changes"); | 1268 | xfs_warn(mp, "failed to write sb changes"); |
1264 | return error; | 1269 | return error; |
1265 | } | 1270 | } |
1266 | mp->m_update_flags = 0; | 1271 | mp->m_update_sb = false; |
1267 | } | 1272 | } |
1268 | 1273 | ||
1269 | /* | 1274 | /* |
@@ -1293,8 +1298,9 @@ xfs_fs_remount( | |||
1293 | 1298 | ||
1294 | /* | 1299 | /* |
1295 | * Second stage of a freeze. The data is already frozen so we only | 1300 | * Second stage of a freeze. The data is already frozen so we only |
1296 | * need to take care of the metadata. Once that's done write a dummy | 1301 | * need to take care of the metadata. Once that's done sync the superblock |
1297 | * record to dirty the log in case of a crash while frozen. | 1302 | * to the log to dirty it in case of a crash while frozen. This ensures that we |
1303 | * will recover the unlinked inode lists on the next mount. | ||
1298 | */ | 1304 | */ |
1299 | STATIC int | 1305 | STATIC int |
1300 | xfs_fs_freeze( | 1306 | xfs_fs_freeze( |
@@ -1304,7 +1310,7 @@ xfs_fs_freeze( | |||
1304 | 1310 | ||
1305 | xfs_save_resvblks(mp); | 1311 | xfs_save_resvblks(mp); |
1306 | xfs_quiesce_attr(mp); | 1312 | xfs_quiesce_attr(mp); |
1307 | return xfs_fs_log_dummy(mp); | 1313 | return xfs_sync_sb(mp, true); |
1308 | } | 1314 | } |
1309 | 1315 | ||
1310 | STATIC int | 1316 | STATIC int |
@@ -1531,7 +1537,7 @@ xfs_fs_mount( | |||
1531 | static long | 1537 | static long |
1532 | xfs_fs_nr_cached_objects( | 1538 | xfs_fs_nr_cached_objects( |
1533 | struct super_block *sb, | 1539 | struct super_block *sb, |
1534 | int nid) | 1540 | struct shrink_control *sc) |
1535 | { | 1541 | { |
1536 | return xfs_reclaim_inodes_count(XFS_M(sb)); | 1542 | return xfs_reclaim_inodes_count(XFS_M(sb)); |
1537 | } | 1543 | } |
@@ -1539,10 +1545,9 @@ xfs_fs_nr_cached_objects( | |||
1539 | static long | 1545 | static long |
1540 | xfs_fs_free_cached_objects( | 1546 | xfs_fs_free_cached_objects( |
1541 | struct super_block *sb, | 1547 | struct super_block *sb, |
1542 | long nr_to_scan, | 1548 | struct shrink_control *sc) |
1543 | int nid) | ||
1544 | { | 1549 | { |
1545 | return xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan); | 1550 | return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan); |
1546 | } | 1551 | } |
1547 | 1552 | ||
1548 | static const struct super_operations xfs_super_operations = { | 1553 | static const struct super_operations xfs_super_operations = { |
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index 1743b9f8e23d..a0c8067cea6f 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c | |||
@@ -149,24 +149,6 @@ static struct ctl_table xfs_table[] = { | |||
149 | .extra2 = &xfs_params.inherit_noatim.max | 149 | .extra2 = &xfs_params.inherit_noatim.max |
150 | }, | 150 | }, |
151 | { | 151 | { |
152 | .procname = "xfsbufd_centisecs", | ||
153 | .data = &xfs_params.xfs_buf_timer.val, | ||
154 | .maxlen = sizeof(int), | ||
155 | .mode = 0644, | ||
156 | .proc_handler = proc_dointvec_minmax, | ||
157 | .extra1 = &xfs_params.xfs_buf_timer.min, | ||
158 | .extra2 = &xfs_params.xfs_buf_timer.max | ||
159 | }, | ||
160 | { | ||
161 | .procname = "age_buffer_centisecs", | ||
162 | .data = &xfs_params.xfs_buf_age.val, | ||
163 | .maxlen = sizeof(int), | ||
164 | .mode = 0644, | ||
165 | .proc_handler = proc_dointvec_minmax, | ||
166 | .extra1 = &xfs_params.xfs_buf_age.min, | ||
167 | .extra2 = &xfs_params.xfs_buf_age.max | ||
168 | }, | ||
169 | { | ||
170 | .procname = "inherit_nosymlinks", | 152 | .procname = "inherit_nosymlinks", |
171 | .data = &xfs_params.inherit_nosym.val, | 153 | .data = &xfs_params.inherit_nosym.val, |
172 | .maxlen = sizeof(int), | 154 | .maxlen = sizeof(int), |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index fa3135b9bf04..eb90cd59a0ec 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -472,6 +472,7 @@ xfs_trans_apply_sb_deltas( | |||
472 | whole = 1; | 472 | whole = 1; |
473 | } | 473 | } |
474 | 474 | ||
475 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); | ||
475 | if (whole) | 476 | if (whole) |
476 | /* | 477 | /* |
477 | * Log the whole thing, the fields are noncontiguous. | 478 | * Log the whole thing, the fields are noncontiguous. |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 0a4d4ab6d9a9..75798412859a 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -327,9 +327,10 @@ xfs_trans_read_buf_map( | |||
327 | return -EIO; | 327 | return -EIO; |
328 | } | 328 | } |
329 | 329 | ||
330 | if (tp) | 330 | if (tp) { |
331 | _xfs_trans_bjoin(tp, bp, 1); | 331 | _xfs_trans_bjoin(tp, bp, 1); |
332 | trace_xfs_trans_read_buf(bp->b_fspriv); | 332 | trace_xfs_trans_read_buf(bp->b_fspriv); |
333 | } | ||
333 | *bpp = bp; | 334 | *bpp = bp; |
334 | return 0; | 335 | return 0; |
335 | 336 | ||